Merge pull request #29798 from taosdata/fix/TD-33738-MAIN

fix: add case taosdumpComp.py and del deps/avro/lang/other files - main
This commit is contained in:
Linhe Huo 2025-02-19 13:58:38 +08:00 committed by GitHub
commit f7a4c29c50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1486 changed files with 137 additions and 265936 deletions

View File

@ -0,0 +1,8 @@
#!server_ver: ver:3.1.0.0
#!taosdump_ver: 2.5.2_cf16c4d
#!os_id: LINUX
#!escape_char: true
#!loose_mode: false
#!charset: UTF-8
#!dumpdb: test:

View File

@ -0,0 +1,10 @@
#!server_ver: ver:3.1.0.0
#!taosdump_ver: 2.5.2_cf16c4d
#!os_id: LINUX
#!escape_char: true
#!loose_mode: false
#!charset: UTF-8
CREATE DATABASE IF NOT EXISTS test REPLICA 1 DURATION 14400m KEEP 5256000m,5256000m,5256000m PRECISION 'ms' MINROWS 100 MAXROWS 4096 COMP 2 ;
CREATE TABLE IF NOT EXISTS test.`meters`(`ts` timestamp,`bc` bool,`fc` float,`dc` double,`ti` tinyint,`si` smallint,`ic` int,`bi` bigint,`uti` tinyint unsigned,`usi` smallint unsigned,`ui` int unsigned,`ubi` bigint unsigned,`bin` binary(4),`nch` nchar(8)) TAGS(`tbc` bool,`tfc` float,`tdc` double,`tti` tinyint,`tsi` smallint,`tic` int,`tbi` bigint,`tuti` tinyint unsigned,`tusi` smallint unsigned,`tui` int unsigned,`tubi` bigint unsigned,`tbin` binary(4),`tnch` nchar(8));

View File

@ -0,0 +1,118 @@
###################################################################
# Copyright (c) 2016 by TAOS Technologies, Inc.
# All rights reserved.
#
# This file is proprietary and confidential to TAOS Technologies.
# No part of this file may be reproduced, stored, transmitted,
# disclosed or used in any form or by any means other than as
# expressly provided by the written permission from Jianhui Tao
#
###################################################################
# -*- coding: utf-8 -*-
import os
import json
import frame
import frame.etool
from frame.log import *
from frame.cases import *
from frame.sql import *
from frame.caseBase import *
from frame import *
class TDTestCase(TBase):
def caseDescription(self):
"""
test taosdump compatible with import data coming from v3.1.0.0
"""
def exec(self, command):
tdLog.info(command)
return os.system(command)
def findPrograme(self):
# taosdump
taosdump = etool.taosDumpFile()
if taosdump == "":
tdLog.exit("taosdump not found!")
else:
tdLog.info("taosdump found in %s" % taosdump)
# tmp dir
tmpdir = "./tmp"
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
else:
print("directory exists")
os.system("rm -rf %s/*" % tmpdir)
return taosdump, tmpdir
def dumpIn(self, taosdump, indir):
# dump in
self.exec(f'{taosdump} -i {indir}')
def checkSame(self, db, stb, aggfun, expect):
# sum pk db
sql = f"select {aggfun} from {db}.{stb}"
tdSql.query(sql)
tdSql.checkData(0, 0, expect, show=True)
def verifyResult(self, db):
# compare sum(pk)
stb = "meters"
self.checkSame(db, stb, "count(ts)", 5000)
self.checkSame(db, stb, "last(ts)", "2023-11-15 07:36:39")
self.checkSame(db, stb, "last(bc)", False)
self.checkSame(db, stb, "sum(fc)", 2468.910999777726829)
self.checkSame(db, stb, "sum(dc)", 24811.172123999996984)
self.checkSame(db, stb, "sum(ti)", -411)
self.checkSame(db, stb, "sum(si)", 117073)
self.checkSame(db, stb, "sum(ic)", -39181)
self.checkSame(db, stb, "sum(bi)", -2231976)
self.checkSame(db, stb, "sum(uti)", 248825)
self.checkSame(db, stb, "sum(usi)", 248333)
self.checkSame(db, stb, "sum(ui)", 2484501)
self.checkSame(db, stb, "sum(ubi)", 25051956)
self.checkSame(db, stb, "last(bin)", "kwax")
self.checkSame(db, stb, "last(nch)", "0cYzPVcV")
self.checkSame(db, stb, "sum(tfc)", 3420.000076293945312)
self.checkSame(db, stb, "sum(tdc)", 3020.234999999780030)
self.checkSame(db, stb, "sum(tti)", -100000)
self.checkSame(db, stb, "sum(tsi)", -85000)
self.checkSame(db, stb, "sum(tic)", -4795000)
self.checkSame(db, stb, "sum(tbi)", -1125000)
self.checkSame(db, stb, "sum(tuti)", 475000)
self.checkSame(db, stb, "sum(tusi)", 460000)
self.checkSame(db, stb, "sum(tui)", 520000)
self.checkSame(db, stb, "sum(tubi)", 43155000)
self.checkSame(db, stb, "last(tbin)", "ywkc")
self.checkSame(db, stb, "last(tnch)", "kEoWzCBj")
def run(self):
# database
db = "test"
# find
taosdump, tmpdir = self.findPrograme()
data = "./tools/taosdump/native/compa"
# dump in
self.dumpIn(taosdump, data)
# verify db
self.verifyResult(db)
def stop(self):
tdSql.close()
tdLog.success("%s successfully executed" % __file__)
tdCases.addWindows(__file__, TDTestCase())
tdCases.addLinux(__file__, TDTestCase())

View File

@ -144,6 +144,7 @@
,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/ws/websocket.py -R
# taosdump 43 cases
,,y,army,./pytest.sh python3 ./test.py -f tools/taosdump/native/taosdumpCompa.py
,,y,army,./pytest.sh python3 ./test.py -f tools/taosdump/native/taosdumpTest.py
,,y,army,./pytest.sh python3 ./test.py -f tools/taosdump/native/taosdumpDbStb.py
,,y,army,./pytest.sh python3 ./test.py -f tools/taosdump/native/taosdumpTestTypeDouble.py

View File

@ -1,84 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Generated from CLion C/C++ Code Style settings
BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Always
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: Yes
BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: true
BreakBeforeBinaryOperators: NonAssignment
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 0
CompactNamespaces: false
ContinuationIndentWidth: 4
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PointerAlignment: Right
ReflowComments: false
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Never

View File

@ -1,10 +0,0 @@
build/
build.mac/
doc/
test.avro
test6.df
test8.df
test9.df
test_skip.df
test_lastSync.df
test_readRecordUsingLastSync.df

View File

@ -1,4 +0,0 @@
See https://avro.apache.org/ for a list of authors

View File

@ -1,218 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
cmake_minimum_required (VERSION 3.1)
set (CMAKE_LEGACY_CYGWIN_WIN32 0)
if (NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.0)
# Enable MACOSX_RPATH by default
cmake_policy (SET CMP0042 NEW)
endif()
if (NOT DEFINED CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS)
set (CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt" AVRO_VERSION)
else (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../../share/VERSION.txt"
AVRO_VERSION)
endif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/VERSION.txt)
string(REPLACE "\n" "" AVRO_VERSION ${AVRO_VERSION})
string(REPLACE "." ";" AVRO_VERSION ${AVRO_VERSION})
list(GET AVRO_VERSION 0 AVRO_VERSION_MAJOR)
list(GET AVRO_VERSION 1 AVRO_VERSION_MINOR)
list(GET AVRO_VERSION 2 AVRO_VERSION_PATCH)
project (Avro-cpp)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR})
if (WIN32 AND NOT CYGWIN AND NOT MSYS)
add_definitions (/EHa)
add_definitions (
-DNOMINMAX
-DBOOST_REGEX_DYN_LINK
-DBOOST_FILESYSTEM_DYN_LINK
-DBOOST_SYSTEM_DYN_LINK
-DBOOST_IOSTREAMS_DYN_LINK
-DBOOST_PROGRAM_OPTIONS_DYN_LINK
-DBOOST_ALL_NO_LIB)
endif()
if (CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
if (AVRO_ADD_PROTECTOR_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fstack-protector-all -D_GLIBCXX_DEBUG")
# Unset _GLIBCXX_DEBUG for avrogencpp.cc because using Boost Program Options
# leads to linking errors when compiling with _GLIBCXX_DEBUG as described on
# https://stackoverflow.com/questions/19729036/
set_source_files_properties(impl/avrogencpp.cc PROPERTIES COMPILE_FLAGS "-U_GLIBCXX_DEBUG")
endif ()
endif ()
find_package (Boost 1.38 REQUIRED
COMPONENTS filesystem iostreams program_options regex system)
find_package(Snappy)
if (SNAPPY_FOUND)
set(SNAPPY_PKG libsnappy)
add_definitions(-DSNAPPY_CODEC_AVAILABLE)
message("Enabled snappy codec")
else (SNAPPY_FOUND)
set(SNAPPY_PKG "")
set(SNAPPY_LIBRARIES "")
set(SNAPPY_INCLUDE_DIR "")
message("Disabled snappy codec. libsnappy not found.")
endif (SNAPPY_FOUND)
add_definitions (${Boost_LIB_DIAGNOSTIC_DEFINITIONS})
include_directories (api ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS})
set (AVRO_SOURCE_FILES
impl/Compiler.cc impl/Node.cc impl/LogicalType.cc
impl/NodeImpl.cc impl/ResolverSchema.cc impl/Schema.cc
impl/Types.cc impl/ValidSchema.cc impl/Zigzag.cc
impl/BinaryEncoder.cc impl/BinaryDecoder.cc
impl/Stream.cc impl/FileStream.cc
impl/Generic.cc impl/GenericDatum.cc
impl/DataFile.cc
impl/parsing/Symbol.cc
impl/parsing/ValidatingCodec.cc
impl/parsing/JsonCodec.cc
impl/parsing/ResolvingDecoder.cc
impl/json/JsonIO.cc
impl/json/JsonDom.cc
impl/Resolver.cc impl/Validator.cc
)
add_library (avrocpp SHARED ${AVRO_SOURCE_FILES})
set_property (TARGET avrocpp
APPEND PROPERTY COMPILE_DEFINITIONS AVRO_DYN_LINK)
add_library (avrocpp_s STATIC ${AVRO_SOURCE_FILES})
target_include_directories(avrocpp_s PRIVATE ${SNAPPY_INCLUDE_DIR})
set_property (TARGET avrocpp avrocpp_s
APPEND PROPERTY COMPILE_DEFINITIONS AVRO_SOURCE)
set_target_properties (avrocpp PROPERTIES
VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH})
set_target_properties (avrocpp_s PROPERTIES
VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH})
target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
target_include_directories(avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
add_executable (precompile test/precompile.cc)
target_link_libraries (precompile avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
macro (gen file ns)
add_custom_command (OUTPUT ${file}.hh
COMMAND avrogencpp
-p -
-i ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file}
-o ${file}.hh -n ${ns} -U
DEPENDS avrogencpp ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file})
add_custom_target (${file}_hh DEPENDS ${file}.hh)
endmacro (gen)
gen (empty_record empty)
gen (bigrecord testgen)
gen (bigrecord_r testgen_r)
gen (bigrecord2 testgen2)
gen (tweet testgen3)
gen (union_array_union uau)
gen (union_map_union umu)
gen (union_conflict uc)
gen (recursive rec)
gen (reuse ru)
gen (circulardep cd)
gen (tree1 tr1)
gen (tree2 tr2)
gen (crossref cr)
gen (primitivetypes pt)
gen (cpp_reserved_words cppres)
add_executable (avrogencpp impl/avrogencpp.cc)
target_link_libraries (avrogencpp avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
enable_testing()
macro (unittest name)
add_executable (${name} test/${name}.cc)
target_link_libraries (${name} avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
add_test (NAME ${name} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${name})
endmacro (unittest)
unittest (buffertest)
unittest (unittest)
unittest (SchemaTests)
unittest (LargeSchemaTests)
unittest (CodecTests)
unittest (StreamTests)
unittest (SpecificTests)
unittest (DataFileTests)
unittest (JsonTests)
unittest (AvrogencppTests)
unittest (CompilerTests)
unittest (AvrogencppTestReservedWords)
add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh)
add_dependencies (AvrogencppTests bigrecord_hh bigrecord_r_hh bigrecord2_hh
tweet_hh
union_array_union_hh union_map_union_hh union_conflict_hh
recursive_hh reuse_hh circulardep_hh tree1_hh tree2_hh crossref_hh
primitivetypes_hh empty_record_hh)
include (InstallRequiredSystemLibraries)
set (CPACK_PACKAGE_FILE_NAME "avrocpp-${AVRO_VERSION_MAJOR}")
include (CPack)
install (TARGETS avrocpp avrocpp_s
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
RUNTIME DESTINATION lib)
install (TARGETS avrogencpp RUNTIME DESTINATION bin)
install (DIRECTORY api/ DESTINATION include/avro
FILES_MATCHING PATTERN *.hh)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif (NOT CMAKE_BUILD_TYPE)

View File

@ -1 +0,0 @@
Refer to CHANGES.txt in the root of avro repository for change log

View File

@ -1,31 +0,0 @@
##
## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements. See the NOTICE file
## distributed with this work for additional information
## regarding copyright ownership. The ASF licenses this file
## to you under the Apache License, Version 2.0 (the
## "License"); you may not use this file except in compliance
## with the License. You may obtain a copy of the License at
##
## https://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing,
## software distributed under the License is distributed on an
## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
## KIND, either express or implied. See the License for the
## specific language governing permissions and limitations
## under the License.
##
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "Avro C++"
JAVADOC_AUTOBRIEF = YES
INPUT = MainPage.dox \
api
INPUT_ENCODING = UTF-8
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
ALPHABETICAL_INDEX = NO
GENERATE_LATEX = NO
HAVE_DOT = NO
OUTPUT_DIRECTORY = doc
EXAMPLE_PATH = examples

View File

@ -1,54 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Tries to find Snappy headers and libraries.
#
# Usage of this module as follows:
#
# find_package(Snappy)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# SNAPPY_ROOT_DIR Set this variable to the root installation of
# Snappy if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# SNAPPY_FOUND System has Snappy libs/headers
# SNAPPY_LIBRARIES The Snappy libraries
# SNAPPY_INCLUDE_DIR The location of Snappy headers
find_path(SNAPPY_INCLUDE_DIR
NAMES snappy.h
HINTS ${SNAPPY_ROOT_DIR}/include)
find_library(SNAPPY_LIBRARIES
NAMES snappy
HINTS ${SNAPPY_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Snappy DEFAULT_MSG
SNAPPY_LIBRARIES
SNAPPY_INCLUDE_DIR)
mark_as_advanced(
SNAPPY_ROOT_DIR
SNAPPY_LIBRARIES
SNAPPY_INCLUDE_DIR)

View File

@ -1,261 +0,0 @@
Apache License
Version 2.0, January 2004
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
----------------------------------------------------------------------
License for the m4 macros used by the C++ implementation:
Files:
* lang/c++/m4/m4_ax_boost_system.m4
Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
Copyright (c) 2008 Michael Tindal
Copyright (c) 2008 Daniel Casimiro <dan.casimiro@gmail.com>
* lang/c++/m4/m4_ax_boost_asio.m4
Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
Copyright (c) 2008 Pete Greenwell <pete@mu.org>
* lang/c++/m4/m4_ax_boost_filesystem.m4
Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
Copyright (c) 2009 Michael Tindal
Copyright (c) 2009 Roman Rybalko <libtorrent@romanr.info>
* lang/c++/m4/m4_ax_boost_thread.m4
Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
Copyright (c) 2009 Michael Tindal
* lang/c++/m4/m4_ax_boost_regex.m4
Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
Copyright (c) 2008 Michael Tindal
* lang/c++/m4/m4_ax_boost_base.m4
Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
License text:
| Copying and distribution of this file, with or without modification, are
| permitted in any medium without royalty provided the copyright notice
| and this notice are preserved. This file is offered as-is, without any
| warranty.
----------------------------------------------------------------------
License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation:
File: lang/c++/api/Boost.hh
| Boost Software License - Version 1.0 - August 17th, 2003
|
| Permission is hereby granted, free of charge, to any person or organization
| obtaining a copy of the software and accompanying documentation covered by
| this license (the "Software") to use, reproduce, display, distribute,
| execute, and transmit the Software, and to prepare derivative works of the
| Software, and to permit third-parties to whom the Software is furnished to
| do so, all subject to the following:
|
| The copyright notices in the Software and this entire statement, including
| the above license grant, this restriction and the following disclaimer,
| must be included in all copies of the Software, in whole or in part, and
| all derivative works of the Software, unless such copies or derivative
| works are solely in the form of machine-executable object code generated by
| a source language processor.
|
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
| FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
| SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
| FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
| ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
| DEALINGS IN THE SOFTWARE.

View File

@ -1,33 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Visual Studio 2019 Build Instructions
## Prerequisites
* Microsoft Visual Studio 2019.
* CMake >= 3.12 (should be supplied as part of VS2019 installation).
* Clone [https://github.com/spektom/snappy-visual-cpp](https://github.com/spektom/snappy-visual-cpp), and follow build instructions in `README.md`.
* Install Boost from [https://netcologne.dl.sourceforge.net/project/boost/boost-binaries/1.68.0/boost_1_68_0-msvc-14.1-64.exe](https://netcologne.dl.sourceforge.net/project/boost/boost-binaries/1.68.0/boost_1_68_0-msvc-14.1-64.exe).
* Add `C:\<path to>\boost_1_68_0\lib64-msvc-14.1` to PATH environment variable.
## Building
cd lang\c++
cmake -G "Visual Studio 16 2019" -DBOOST_ROOT=C:\<path to>\boost_1_68_0 -DBOOST_INCLUDEDIR=c:\<path to>\boost_1_68_0\boost -DBOOST_LIBRARYDIR=c:\<path to>\boost_1_68_0\lib64-msvc-14.1 -DSNAPPY_INCLUDE_DIR=C:\<path to>\snappy-visual-cpp -DSNAPPY_LIBRARIES=C:\<path to>\snappy-visual-cpp\x64\Release\snappy.lib ..
msbuild Avro-cpp.sln /p:Configuration=Release /p:Platform=x64

View File

@ -1,339 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*!
\mainpage
<h2 id="Introduction">Introduction to Avro C++ <a href="#Introduction">§</a></h2>
<p>Avro is a data serialization system. See
<a href="https://avro.apache.org/docs/current/">https://avro.apache.org/docs/current/</a>
for background information.</p>
<p>Avro C++ is a C++ library which implements parts of the <a href="https://avro.apache.org/docs/current/spec.html"> Avro Specification</a>. The library includes the following functionality:</p>
<ul>
<li>Assembling schemas programmatically.
<li>A schema parser, which can parse Avro schema (written in JSON) into a Schema object.
<li>Encoders and decoders to encode data into Avro format and decode it back using primitive functions. There are multiple implementations of encoders and decoders.
<ul>
<li>A binary encoder, which encodes into binary Avro data.
<li>A JSON encoder, which encodes into JSON Avro data.
<li>A validating encoder, an encoder proxy, which validates the call sequence to the encoder before sending the calls to another encoder.
<li>A binary decoder, which decodes binary Avro data.
<li>A JSON decoder, which decodes JSON Avro data.
<li>A validating decoder, a decoder proxy, which validates the call sequence to the decoder before sending the calls to another decoder.
<li>A resolving decoder, which accepts calls for according to a reader's schema but decodes data corresponding to a different (writer's) schema doing schema resolution according to resolution rules in the Avro specification.
</ul>
<li>Streams for storing and reading data, which Encoders and Decoders use.
<li>Support for Avro DataFile.
<li>A code generator, which generates C++ classes and functions to encode and decode them. The code generator produces a C++ header file from a given schema file.
</ul>
Presently there is no support for the following specified in Avro specification.
<ul>
<li>Avro RPC
</ul>
<b>Note:</b> Prior to Avro release 1.5, some of the functionality mentioned above was available through a somewhat different API and set tools. They are partially incompatible to the present ones. They continue to be available but will be deprecated and discontinued sometime in the future. The documentation on that API can be found at <a href="https://avro.apache.org/docs/1.4.0/api/cpp/html/index.html">https://avro.apache.org/docs/1.4.0/api/cpp/html/index.html</a>
<h2 id="Installing">Installing Avro C++ <a href="#Installing">§</a></h2>
<h3>Supported platforms and pre-requisites</h3>
One should be able to build Avro C++ on (1) any UNIX flavor including cygwin for Windows and (2) natively on Windows using Visual Studio. We have tested it on (1) Linux systems (Ubuntu and RHEL) and Cygwin and Visual Studio 2010 Express edition.
In order to build Avro C++, one needs the following:
<ul>
<li>A C++ compiler and runtime libraries.
<li>Boost library version 1.38 or later. Apart from the header-only libraries of Boost, Avro C++ requires filesystem, iostreams, system and program_options libraries. Please see <a href="https://www.boost.org/">https://www.boost.org</a> or your platform's documentation for details on how to set up Boost for your platform.
<li>CMake build tool version 2.6 or later. Please see <a href="https://www.cmake.org">https://www.cmake.org</a> or your platform's documentation for details on how to set up CMake for your system.
<li>Python. If not already present, please consult your platform-specific documentation on how to install Python on your system.
</ul>
For Ubuntu Linux, for example, you can have these by doing
<tt>apt-get install</tt> for the following packages:
\ul
\li cmake
\li g++
\li libboost-dev
\li libboost-filesystem-dev
\li libboost-iostreams-dev
\li libboost-program-options-dev
\li libboost-system-dev
For Windows native builds, you need to install the following:
\ul
\li cmake
\li boost distribution from Boost consulting
\li Visual studio
<h3>Installing Avro C++</h3>
<ol>
<li>Download the latest Avro distribution. Avro distribution is a compressed tarball.
Please see the main documentation if you want to build anything more than Avro C++.
</ol>
<h4>On Unix systems and on Cygwin</h4>
<ol>
<li>Expand the tarball into a directory.
<li>Change to <tt>lang/c++</tt> subdirectory.
<li>Type <tt>./build.sh test</tt>. This builds Avro C++ and runs tests on it.
<li>Type <tt>./build.sh install</tt>. This installs Avro C++ under /usr/local on your system.
</ol>
<h4>On native Windows</h4>
<ol>
<li>Ensure that CMake's bin directory and Boost's lib directory are in the path.
<li>Expand the tarball into a directory.
<li>Change to <tt>lang/c++</tt> subdirectory.
<li>Create a subdirectory, say, build.win, and change to that directory.
<li>Type <tt>cmake -G "Visual Studio 10"</tt>. It creates, among other things, Avro-cpp.sln file.
<li>Open the solution file using Visual Studio and build the projects from within the Visual Studio.
<li>To run all unit tests, build the special project named "RUN_TESTS".
<li>After building all the projects, you can also execute the unit tests from command line. <tt>ctest -C release</tt> or <tt>ctest -C debug</tt>.
</ol>
<h2 id="GettingStarted">Getting started with Avro C++ <a href="#GettingStarted">§</a></h2>
<p>Although Avro does not require use of code generation, that is the easiest
way to get started with the Avro C++ library.
The code generator reads a schema, and generates a C++
header file that defines one or more C++ <tt>struct</tt>s to represent
the data for the schema and functions to encode and decode those
<tt>struct</tt>s. Even if you wish to write custom code to encode and decode
your objects using the core functionality of Avro C++, the generated code
can serve as an example of how to use the code functionality.
<p>
Let's walk through an example, using a simple schema. Use the
schema that represents an complex number:</p>
<b>File: cpx.json</b>
\includelineno cpx.json
<p>
<b>Note:</b> All the example code given here can be found under
<tt>examples</tt> directory of the distribution.
<p>
Assume this JSON representation of the schema is stored in a file
called <tt>cpx.json</tt>. To generate the code issue the command:.
<pre>
avrogencpp -i cpx.json -o cpx.hh -n c
</pre>
The <tt>-i</tt> flag specifies the input schema file and <tt>-o</tt> flag
specifies the output header file to generate. The generated C++ code will be
in the namespace specified with <tt>-n</tt> flag.
<p>
The generated file, among other things will have the following:
<pre>
...
namespace c {
...
struct cpx {
double re;
double im;
};
...
}
</pre>
<tt>cpx</tt> is a C++ representation of the Avro schema <tt>cpx</tt>.
Now let's see how we can use the code generated to encode data into avro and decode it back.
<b>File: generated.cc</b>
\includelineno generated.cc
In line 27, we construct a memory output stream. By this we indicate that we
want to send the encoded Avro data into memory. In line 28, we construct a
binary encoder, whereby we mean the output should be encoded using the Avro
binary standard. In line 29, we attach the output stream to the encoder. At any given time an encoder can write to only one output stream.
<p>
In line 32, we write the contents of c1 into the output stream using the
encoder. Now the output stream contains the binary representation of
the object. The rest of the code verifies that the data is indeed in the stream.
<p>
In line 35, we construct a memory input stream from the contents of the
output stream. Thus the input stream has the binary representation of the
object. In line 36 and 37, we construct a binary decoder and attach the
input stream to it. Line 40 decodes the contents of the stream into another
object c2. Now c1 and c2 should have identical contents, which one can readily
verify from the output of the program, which should be:
<pre>
(1, 2.13)
</pre>
Now, if you want to encode the data using Avro JSON encoding, you should use
avro::jsonEncoder() instead of avro::binaryEncoder() in line 28
and avro::jsonDecoder() instead of avro::binaryDecoder() in line 36.
<p>
On the other hand, if you want to write the contents to a file instead of
memory, you should use avro::fileOutputStream() instead of
avro::memoryOutputStream() in line 27 and avro::fileInputStream()
instead of avro::memoryInputStream() in line 35.
<p>
<h2 id="ReadingJsonSchema"><a id="ReadingJsonSchema">Reading a JSON schema </a><a href="index.html#ReadingJsonSchema">§</a></h2>
<p>The section above demonstrated pretty much all that's needed to
know to get started reading and writing objects using the Avro C++
code generator. The following sections will cover some more
information.</p>
<p>The library provides some utilities to read a schema that is
stored in a JSON file:</p>
<b>File: schemaload.cc</b>
\includelineno schemaload.cc
<p>
This reads the file, and parses the JSON schema into an in-memory schema
object of type avro::ValidSchema. If, for some reason, the schema is not valid,
the <tt>cpxSchema</tt> object will not be set, and an exception will be
thrown.
</p>
If you always use code Avro generator you don't really need the in-memory
schema objects. But if you use custom objects and routines to encode or decode
avro data, you will need the schema objects. Other uses of schema objects
are generic data objects and schema resolution described in the following
sections.
<h2 id="CustomEncodingDecoding">Custom encoding and decoding <a href="#CustomEncodingDecoding">§</a></h2>
Suppose you want to encode objects of type std::complex<double> from
C++ standard library using the schema defined in cpx.json.
Since std::complex<double> was not generated by Avro, it doesn't know how to encode or decode objects of that
type. You have to tell Avro how to do that.
The recommended way to tell Avro how to encode or decode is to specialize
Avro's codec_traits template. For std::complex<double>, here is what you'd do:
<b>File: custom.cc</b>
\includelineno custom.cc
Please notice that the main function is pretty much similar to that we used
for the generated class. Once <tt>codec_traits</tt> for a specific type is
supplied, you do not really need to do anything special for your custom types.
<p>
But wait, how does Avro know that complex<double> represents the data for
the schema in <tt>cpx.json</tt>? It doesn't. In fact, if you have used
<tt>std::complex<float></tt> instead of <tt>std::complex<double></tt> program
would have worked. But the data in the memory would not have been corresponding
to the schema in <tt>cpx.json</tt>.
<p>
In order to ensure that you indeed use the correct type, you can use
the validating encoders and decoder. Here is how:
<b>File: validating.cc</b>
\includelineno validating.cc
Here, instead of using the plain binary encoder, you use a validating encoder
backed by a binary encoder. Similarly, instead of using the plain binary
decoder, you use a validating decoder backed by a binary decoder. Now,
if you use <tt>std::complex<float></tt> instead of <tt>std::complex<double></tt>
the validating encoder and decoder will throw exception stating that
you are trying to encode or decode <tt>float</tt> instead of <tt>double</tt>.
<p>
You can use any encoder behind the validating encoder and any decoder
behind the validating decoder. But in practice, only the binary encoder
and the binary decoder have no knowledge of the underlying schema.
All other encoders (JSON encoder) and decoders (JSON decoder,
resolving decoder) do know about the schema and they validate internally. So,
fronting them with a validating encoder or validating decoder is wasteful.
<h2 id="GenericDataObjects">Generic data objects <a href="#GenericDataObjects">§</a></h2>
A third way to encode and decode data is to use Avro's generic datum.
Avro's generic datum allows you to read any arbitrary data corresponding to
an arbitrary schema into a generic object. One need not know anything
about the schema or data at compile time.
Here is an example how one can use the generic datum.
<b>File: generic.cc</b>
\includelineno generic.cc
In this example, we encode the data using generated code and decode it with
generic datum. Then we examine the contents of the generic datum and extract
them. Please see \ref avro::GenericDatum for more details on how to use it.
<h2 id="ReadingDifferentSchema">Reading data with a schema different from that of the writer <a href="#ReadingDifferentSchema">§</a></h2>
It is possible to read the data written according to one schema
using a different schema, provided the reader's schema and the writer's
schema are compatible according to the Avro's Schema resolution rules.
<p>
For example, you have a reader which is interested only in the imaginary part
of a complex number while the writer writes both the real and imaginary parts.
It is possible to do automatic schema resolution between the writer's schema
and schema as shown below.
<b>File: imaginary.json</b>
\includelineno imaginary.json
<pre>
avrogencpp -i imaginary.json -o imaginary.hh -n i
</pre>
<b>File: resolving.cc</b>
\includelineno resolving.cc
In this example, writer and reader deal with different schemas,
both have a record with the name 'cpx'. The writer schema has two fields and
the reader's has just one. We generated code for writer's schema in a namespace
<tt>c</tt> and the reader's in <tt>i</tt>.
<p>
Please notice how the reading part of the example at line 60 reads as if
the stream contains the data corresponding to its schema. The schema resolution
is automatically done by the resolving decoder.
<p>
In this example, we have used a simple (somewhat artificial) projection (where the set of fields in
the reader's schema is a subset of set of fields in the writer's). But more
complex resolutions are allowed by Avro specification.
<h2 id="UsingAvroDataFiles">Using Avro data files <a href="#UsingAvroDataFiles">§</a></h2>
Avro specification specifies a format for data files. Avro C++ implements
the specification. The code below demonstrates how one can use the
Avro data file to store and retrieve a collection of objects
corresponding to a given schema.
<b>File: datafile.cc</b>
\includelineno datafile.cc
Please see DataFile.hh for more details.
*/

View File

@ -1,5 +0,0 @@
For news, visit the Avro web site at
https://avro.apache.org/

View File

@ -1,6 +0,0 @@
Apache Avro
Copyright 2010-2015 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (https://www.apache.org/).

View File

@ -1,69 +0,0 @@
Avro C++ README.txt
The C++ port is thus far incomplete. Currently, it contains:
- Serializer/Parser- objects for writing/reading raw binary.
- xxxSchema- objects for composing schemas.
- ValidSchema- a schema object that has been converted to a parse tree
(with some sanity checks).
- ValidSchema.toJson() writes the schema as a json object.
- ValidatingSerializer/ValidatingParser- check that reads/writes
match the expected schema type (more expensive than the raw
serializer/parser but they detect errors, and allow dynamic
discovery of parsed data/attributes).
- Compiler (compileJsonSchema())- converts a Json string schema to a
ValidSchema.
- Code Generation (experimental) - given a schema it generates C++
objects of the same data types, and the code to serialize and parse
it.
What's missing: Rpc containers are not yet implemented. Documentation is sparse.
INSTRUCTIONS
Pre-requisites:
To compile requires boost headers, and the boost regex library. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS).
To build one requires cmake 2.6 or later.
To generate a Makefile under Unix, MacOS (using GNU) or Cygwin use:
mkdir build
cd build
cmake -G "Unix Makefiles" ..
If it doesn't work, either you are missing some packages (boost, flex or bison),
or you need to help configure locate them.
If the Makefile is configured correctly, then you can make and run tests:
make
ctest
To install
make package
and then untar the generated .tar.gz file.
To build and test on MacOS (using Xcode)
mkdir build.mac
cd build.mac
cmake -G Xcode
xcodebuild -configuration Release
ctest -C Release
If debug version is required, replace 'Release' above with 'Debug'.
Note: The LICENSE and NOTICE files in the lang/c++ source directory are used to
build the binary distribution. The LICENSE and NOTICE information for the Avro
C++ source distribution is in the root directory.

View File

@ -1,80 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_AvroParse_hh__
#define avro_AvroParse_hh__
#include "AvroTraits.hh"
#include "Config.hh"
#include "ResolvingReader.hh"
/// \file
///
/// Standalone parse functions for Avro types.
namespace avro {
/// The main parse entry point function. Takes a parser (either validating or
/// plain) and the object that should receive the parsed data.
template<typename Reader, typename T>
void parse(Reader &p, T &val) {
parse(p, val, is_serializable<T>());
}
template<typename T>
void parse(ResolvingReader &p, T &val) {
translatingParse(p, val, is_serializable<T>());
}
/// Type trait should be set to is_serializable in otherwise force the compiler to complain.
template<typename Reader, typename T>
void parse(Reader &p, T &val, const std::false_type &) {
static_assert(sizeof(T) == 0, "Not a valid type to parse");
}
template<typename Reader, typename T>
void translatingParse(Reader &p, T &val, const std::false_type &) {
static_assert(sizeof(T) == 0, "Not a valid type to parse");
}
// @{
/// The remainder of the file includes default implementations for serializable types.
template<typename Reader, typename T>
void parse(Reader &p, T &val, const std::true_type &) {
p.readValue(val);
}
template<typename Reader>
void parse(Reader &p, std::vector<uint8_t> &val, const std::true_type &) {
p.readBytes(val);
}
template<typename T>
void translatingParse(ResolvingReader &p, T &val, const std::true_type &) {
p.parse(val);
}
// @}
} // namespace avro
#endif

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_AvroSerialize_hh__
#define avro_AvroSerialize_hh__
#include "AvroTraits.hh"
#include "Config.hh"
/// \file
///
/// Standalone serialize functions for Avro types.
namespace avro {
/// The main serializer entry point function. Takes a serializer (either validating or
/// plain) and the object that should be serialized.
template<typename Writer, typename T>
void serialize(Writer &s, const T &val) {
serialize(s, val, is_serializable<T>());
}
/// Type trait should be set to is_serializable in otherwise force the compiler to complain.
template<typename Writer, typename T>
void serialize(Writer &s, const T &val, const std::false_type &) {
static_assert(sizeof(T) == 0, "Not a valid type to serialize");
}
/// The remainder of the file includes default implementations for serializable types.
// @{
template<typename Writer, typename T>
void serialize(Writer &s, T val, const std::true_type &) {
s.writeValue(val);
}
template<typename Writer>
void serialize(Writer &s, const std::vector<uint8_t> &val, const std::true_type &) {
s.writeBytes(val.data(), val.size());
}
// @}
} // namespace avro
#endif

View File

@ -1,119 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_AvroTraits_hh__
#define avro_AvroTraits_hh__
#include "Config.hh"
#include "Types.hh"
#include <cstdint>
#include <type_traits>
/** @file
*
* This header contains type traits and similar utilities used by the library.
*/
namespace avro {
/**
* Define an is_serializable trait for types we can serialize natively.
* New types will need to define the trait as well.
*/
template<typename T>
struct is_serializable : public std::false_type {};
template<typename T>
struct is_promotable : public std::false_type {};
template<typename T>
struct type_to_avro {
static const Type type = AVRO_NUM_TYPES;
};
/**
* Check if a \p T is a complete type i.e. it is defined as opposed to just
* declared.
*
* is_defined<T>::value will be true or false depending on whether T is a
* complete type or not respectively.
*/
template<class T>
struct is_defined {
typedef char yes[1];
typedef char no[2];
template<class U>
static yes &test(char (*)[sizeof(U)]) { throw 0; };
template<class U>
static no &test(...) { throw 0; };
static const bool value = sizeof(test<T>(0)) == sizeof(yes);
};
/**
* Similar to is_defined, but used to check if T is not defined.
*
* is_not_defined<T>::value will be true or false depending on whether T is an
* incomplete type or not respectively.
*/
template<class T>
struct is_not_defined {
typedef char yes[1];
typedef char no[2];
template<class U>
static yes &test(char (*)[sizeof(U)]) { throw 0; };
template<class U>
static no &test(...) { throw 0; };
static const bool value = sizeof(test<T>(0)) == sizeof(no);
};
#define DEFINE_PRIMITIVE(CTYPE, AVROTYPE) \
template<> \
struct is_serializable<CTYPE> : public std::true_type {}; \
\
template<> \
struct type_to_avro<CTYPE> { \
static const Type type = AVROTYPE; \
};
#define DEFINE_PROMOTABLE_PRIMITIVE(CTYPE, AVROTYPE) \
template<> \
struct is_promotable<CTYPE> : public std::true_type {}; \
\
DEFINE_PRIMITIVE(CTYPE, AVROTYPE)
DEFINE_PROMOTABLE_PRIMITIVE(int32_t, AVRO_INT)
DEFINE_PROMOTABLE_PRIMITIVE(int64_t, AVRO_LONG)
DEFINE_PROMOTABLE_PRIMITIVE(float, AVRO_FLOAT)
DEFINE_PRIMITIVE(double, AVRO_DOUBLE)
DEFINE_PRIMITIVE(bool, AVRO_BOOL)
DEFINE_PRIMITIVE(Null, AVRO_NULL)
DEFINE_PRIMITIVE(std::string, AVRO_STRING)
DEFINE_PRIMITIVE(std::vector<uint8_t>, AVRO_BYTES)
} // namespace avro
#endif

View File

@ -1,63 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Compiler_hh__
#define avro_Compiler_hh__
#include "Config.hh"
#include <cstdint>
#include <istream>
namespace avro {
class AVRO_DECL InputStream;
/// This class is used to implement an avro spec parser using a flex/bison
/// compiler. In order for the lexer to be reentrant, this class provides a
/// lexer object for each parse. The bison parser also uses this class to
/// build up an avro parse tree as the avro spec is parsed.
class AVRO_DECL ValidSchema;
/// Given a stream containing a JSON schema, compiles the schema to a
/// ValidSchema object. Throws if the schema cannot be compiled to a valid
/// schema
AVRO_DECL void compileJsonSchema(std::istream &is, ValidSchema &schema);
/// Non-throwing version of compileJsonSchema.
///
/// \return True if no error, false if error (with the error string set)
///
AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema,
std::string &error);
AVRO_DECL ValidSchema compileJsonSchemaFromStream(InputStream &is);
AVRO_DECL ValidSchema compileJsonSchemaFromMemory(const uint8_t *input, size_t len);
AVRO_DECL ValidSchema compileJsonSchemaFromString(const char *input);
AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string &input);
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
} // namespace avro
#endif

View File

@ -1,43 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Config_hh
#define avro_Config_hh
// Windows DLL support
#ifdef _WIN32
#pragma warning(disable : 4275 4251)
#if defined(AVRO_DYN_LINK)
#ifdef AVRO_SOURCE
#define AVRO_DECL __declspec(dllexport)
#else
#define AVRO_DECL __declspec(dllimport)
#endif // AVRO_SOURCE
#endif // AVRO_DYN_LINK
#include <intsafe.h>
using ssize_t = SSIZE_T;
#endif // _WIN32
#ifndef AVRO_DECL
#define AVRO_DECL
#endif
#endif

View File

@ -1,415 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_DataFile_hh__
#define avro_DataFile_hh__
#include "Config.hh"
#include "Encoder.hh"
#include "Specific.hh"
#include "Stream.hh"
#include "ValidSchema.hh"
#include "buffer/Buffer.hh"
#include <map>
#include <string>
#include <vector>
#include "array"
#include "boost/utility.hpp"
#include <boost/iostreams/filtering_stream.hpp>
namespace avro {
/** Specify type of compression to use when writing data files. */
enum Codec {
NULL_CODEC,
DEFLATE_CODEC,
#ifdef SNAPPY_CODEC_AVAILABLE
SNAPPY_CODEC
#endif
};
const int SyncSize = 16;
/**
* The sync value.
*/
typedef std::array<uint8_t, SyncSize> DataFileSync;
/**
* Type-independent portion of DataFileWriter.
* At any given point in time, at most one file can be written using
* this object.
*/
class AVRO_DECL DataFileWriterBase : boost::noncopyable {
const std::string filename_;
const ValidSchema schema_;
const EncoderPtr encoderPtr_;
const size_t syncInterval_;
Codec codec_;
std::unique_ptr<OutputStream> stream_;
std::unique_ptr<OutputStream> buffer_;
const DataFileSync sync_;
int64_t objectCount_;
typedef std::map<std::string, std::vector<uint8_t>> Metadata;
Metadata metadata_;
int64_t lastSync_;
static std::unique_ptr<OutputStream> makeStream(const char *filename);
static DataFileSync makeSync();
void writeHeader();
void setMetadata(const std::string &key, const std::string &value);
/**
* Generates a sync marker in the file.
*/
void sync();
/**
* Shared constructor portion since we aren't using C++11
*/
void init(const ValidSchema &schema, size_t syncInterval, const Codec &codec);
public:
/**
* Returns the current encoder for this writer.
*/
Encoder &encoder() const { return *encoderPtr_; }
/**
* Returns true if the buffer has sufficient data for a sync to be
* inserted.
*/
void syncIfNeeded();
/**
* Returns the byte offset (within the current file) of the start of the current block being written.
*/
uint64_t getCurrentBlockStart() const;
/**
* Increments the object count.
*/
void incr() {
++objectCount_;
}
/**
* Constructs a data file writer with the given sync interval and name.
*/
DataFileWriterBase(const char *filename, const ValidSchema &schema,
size_t syncInterval, Codec codec = NULL_CODEC);
DataFileWriterBase(std::unique_ptr<OutputStream> outputStream,
const ValidSchema &schema, size_t syncInterval, Codec codec);
~DataFileWriterBase();
/**
* Closes the current file. Once closed this datafile object cannot be
* used for writing any more.
*/
void close();
/**
* Returns the schema for this data file.
*/
const ValidSchema &schema() const { return schema_; }
/**
* Flushes any unwritten data into the file.
*/
void flush();
};
/**
* An Avro datafile that can store objects of type T.
*/
template<typename T>
class DataFileWriter : boost::noncopyable {
std::unique_ptr<DataFileWriterBase> base_;
public:
/**
* Constructs a new data file.
*/
DataFileWriter(const char *filename, const ValidSchema &schema,
size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(filename, schema, syncInterval, codec)) {}
DataFileWriter(std::unique_ptr<OutputStream> outputStream, const ValidSchema &schema,
size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(std::move(outputStream), schema, syncInterval, codec)) {}
/**
* Writes the given piece of data into the file.
*/
void write(const T &datum) {
base_->syncIfNeeded();
avro::encode(base_->encoder(), datum);
base_->incr();
}
/**
* Returns the byte offset (within the current file) of the start of the current block being written.
*/
uint64_t getCurrentBlockStart() { return base_->getCurrentBlockStart(); }
/**
* Closes the current file. Once closed this datafile object cannot be
* used for writing any more.
*/
void close() { base_->close(); }
/**
* Returns the schema for this data file.
*/
const ValidSchema &schema() const { return base_->schema(); }
/**
* Flushes any unwritten data into the file.
*/
void flush() { base_->flush(); }
};
/**
* The type independent portion of reader.
*/
class AVRO_DECL DataFileReaderBase : boost::noncopyable {
const std::string filename_;
const std::unique_ptr<InputStream> stream_;
const DecoderPtr decoder_;
int64_t objectCount_;
bool eof_;
Codec codec_;
int64_t blockStart_{};
int64_t blockEnd_{};
ValidSchema readerSchema_;
ValidSchema dataSchema_;
DecoderPtr dataDecoder_;
std::unique_ptr<InputStream> dataStream_;
typedef std::map<std::string, std::vector<uint8_t>> Metadata;
Metadata metadata_;
DataFileSync sync_{};
// for compressed buffer
std::unique_ptr<boost::iostreams::filtering_istream> os_;
std::vector<char> compressed_;
std::string uncompressed;
void readHeader();
void readDataBlock();
void doSeek(int64_t position);
public:
/**
* Returns the current decoder for this reader.
*/
Decoder &decoder() { return *dataDecoder_; }
/**
* Returns true if and only if there is more to read.
*/
bool hasMore();
/**
* Decrements the number of objects yet to read.
*/
void decr() { --objectCount_; }
/**
* Constructs the reader for the given file and the reader is
* expected to use the schema that is used with data.
* This function should be called exactly once after constructing
* the DataFileReaderBase object.
*/
explicit DataFileReaderBase(const char *filename);
explicit DataFileReaderBase(std::unique_ptr<InputStream> inputStream);
/**
* Initializes the reader so that the reader and writer schemas
* are the same.
*/
void init();
/**
* Initializes the reader to read objects according to the given
* schema. This gives an opportunity for the reader to see the schema
* in the data file before deciding the right schema to use for reading.
* This must be called exactly once after constructing the
* DataFileReaderBase object.
*/
void init(const ValidSchema &readerSchema);
/**
* Returns the schema for this object.
*/
const ValidSchema &readerSchema() { return readerSchema_; }
/**
* Returns the schema stored with the data file.
*/
const ValidSchema &dataSchema() { return dataSchema_; }
/**
* Closes the reader. No further operation is possible on this reader.
*/
void close();
/**
* Move to a specific, known synchronization point, for example one returned
* from tell() after sync().
*/
void seek(int64_t position);
/**
* Move to the next synchronization point after a position. To process a
* range of file entries, call this with the starting position, then check
* pastSync() with the end point before each use of decoder().
*/
void sync(int64_t position);
/**
* Return true if past the next synchronization point after a position.
*/
bool pastSync(int64_t position);
/**
* Return the last synchronization point before our current position.
*/
int64_t previousSync() const;
};
/**
* Reads the contents of data file one after another.
*/
template<typename T>
class DataFileReader : boost::noncopyable {
std::unique_ptr<DataFileReaderBase> base_;
public:
/**
* Constructs the reader for the given file and the reader is
* expected to use the given schema.
*/
DataFileReader(const char *filename, const ValidSchema &readerSchema) : base_(new DataFileReaderBase(filename)) {
base_->init(readerSchema);
}
DataFileReader(std::unique_ptr<InputStream> inputStream, const ValidSchema &readerSchema) : base_(new DataFileReaderBase(std::move(inputStream))) {
base_->init(readerSchema);
}
/**
* Constructs the reader for the given file and the reader is
* expected to use the schema that is used with data.
*/
explicit DataFileReader(const char *filename) : base_(new DataFileReaderBase(filename)) {
base_->init();
}
explicit DataFileReader(std::unique_ptr<InputStream> inputStream) : base_(new DataFileReaderBase(std::move(inputStream))) {
base_->init();
}
/**
* Constructs a reader using the reader base. This form of constructor
* allows the user to examine the schema of a given file and then
* decide to use the right type of data to be deserialize. Without this
* the user must know the type of data for the template _before_
* he knows the schema within the file.
* The schema present in the data file will be used for reading
* from this reader.
*/
explicit DataFileReader(std::unique_ptr<DataFileReaderBase> base) : base_(std::move(base)) {
base_->init();
}
/**
* Constructs a reader using the reader base. This form of constructor
* allows the user to examine the schema of a given file and then
* decide to use the right type of data to be deserialize. Without this
* the user must know the type of data for the template _before_
* he knows the schema within the file.
* The argument readerSchema will be used for reading
* from this reader.
*/
DataFileReader(std::unique_ptr<DataFileReaderBase> base,
const ValidSchema &readerSchema) : base_(std::move(base)) {
base_->init(readerSchema);
}
/**
* Reads the next entry from the data file.
* \return true if an object has been successfully read into \p datum and
* false if there are no more entries in the file.
*/
bool read(T &datum) {
if (base_->hasMore()) {
base_->decr();
avro::decode(base_->decoder(), datum);
return true;
}
return false;
}
/**
* Returns the schema for this object.
*/
const ValidSchema &readerSchema() { return base_->readerSchema(); }
/**
* Returns the schema stored with the data file.
*/
const ValidSchema &dataSchema() { return base_->dataSchema(); }
/**
* Closes the reader. No further operation is possible on this reader.
*/
void close() { return base_->close(); }
/**
* Move to a specific, known synchronization point, for example one returned
* from previousSync().
*/
void seek(int64_t position) { base_->seek(position); }
/**
* Move to the next synchronization point after a position. To process a
* range of file entries, call this with the starting position, then check
* pastSync() with the end point before each call to read().
*/
void sync(int64_t position) { base_->sync(position); }
/**
* Return true if past the next synchronization point after a position.
*/
bool pastSync(int64_t position) { return base_->pastSync(position); }
/**
* Return the last synchronization point before our current position.
*/
int64_t previousSync() { return base_->previousSync(); }
};
} // namespace avro
#endif

View File

@ -1,225 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Decoder_hh__
#define avro_Decoder_hh__
#include "Config.hh"
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "Stream.hh"
#include "ValidSchema.hh"
/// \file
///
/// Low level support for decoding avro values.
/// This class has two types of functions. One type of functions support
/// decoding of leaf values (for example, decodeLong and
/// decodeString). These functions have analogs in Encoder.
///
/// The other type of functions support decoding of maps and arrays.
/// These functions are arrayStart, startItem, and arrayEnd
/// (and similar functions for maps).
namespace avro {
/**
* Decoder is an interface implemented by every decoder capable
* of decoding Avro data.
*/
class AVRO_DECL Decoder {
public:
virtual ~Decoder() = default;
/// All future decoding will come from is, which should be valid
/// until replaced by another call to init() or this Decoder is
/// destructed.
virtual void init(InputStream &is) = 0;
/// Decodes a null from the current stream.
virtual void decodeNull() = 0;
/// Decodes a bool from the current stream
virtual bool decodeBool() = 0;
/// Decodes a 32-bit int from the current stream.
virtual int32_t decodeInt() = 0;
/// Decodes a 64-bit signed int from the current stream.
virtual int64_t decodeLong() = 0;
/// Decodes a single-precision floating point number from current stream.
virtual float decodeFloat() = 0;
/// Decodes a double-precision floating point number from current stream.
virtual double decodeDouble() = 0;
/// Decodes a UTF-8 string from the current stream.
std::string decodeString() {
std::string result;
decodeString(result);
return result;
}
/**
* Decodes a UTF-8 string from the stream and assigns it to value.
*/
virtual void decodeString(std::string &value) = 0;
/// Skips a string on the current stream.
virtual void skipString() = 0;
/// Decodes arbitrary binary data from the current stream.
std::vector<uint8_t> decodeBytes() {
std::vector<uint8_t> result;
decodeBytes(result);
return result;
}
/// Decodes arbitrary binary data from the current stream and puts it
/// in value.
virtual void decodeBytes(std::vector<uint8_t> &value) = 0;
/// Skips bytes on the current stream.
virtual void skipBytes() = 0;
/**
* Decodes fixed length binary from the current stream.
* \param[in] n The size (byte count) of the fixed being read.
* \return The fixed data that has been read. The size of the returned
* vector is guaranteed to be equal to \p n.
*/
std::vector<uint8_t> decodeFixed(size_t n) {
std::vector<uint8_t> result;
decodeFixed(n, result);
return result;
}
/**
* Decodes a fixed from the current stream.
* \param[in] n The size (byte count) of the fixed being read.
* \param[out] value The value that receives the fixed. The vector will
* be size-adjusted based on the fixed schema's size.
*/
virtual void decodeFixed(size_t n, std::vector<uint8_t> &value) = 0;
/// Skips fixed length binary on the current stream.
virtual void skipFixed(size_t n) = 0;
/// Decodes enum from the current stream.
virtual size_t decodeEnum() = 0;
/// Start decoding an array. Returns the number of entries in first chunk.
virtual size_t arrayStart() = 0;
/// Returns the number of entries in next chunk. 0 if last.
virtual size_t arrayNext() = 0;
/// Tries to skip an array. If it can, it returns 0. Otherwise
/// it returns the number of elements to be skipped. The client
/// should skip the individual items. In such cases, skipArray
/// is identical to arrayStart.
virtual size_t skipArray() = 0;
/// Start decoding a map. Returns the number of entries in first chunk.
virtual size_t mapStart() = 0;
/// Returns the number of entries in next chunk. 0 if last.
virtual size_t mapNext() = 0;
/// Tries to skip a map. If it can, it returns 0. Otherwise
/// it returns the number of elements to be skipped. The client
/// should skip the individual items. In such cases, skipMap
/// is identical to mapStart.
virtual size_t skipMap() = 0;
/// Decodes a branch of a union. The actual value is to follow.
virtual size_t decodeUnionIndex() = 0;
/// Drains any additional data at the end of the current entry in a stream.
/// It also returns any unused bytes back to any underlying input stream.
/// One situation this happens is when the reader's schema and
/// the writer's schema are records but are different and the writer's
/// record has more fields at the end of the record.
/// Leaving such data unread is usually not a problem. If multiple
/// records are stored consecutively in a stream (e.g. Avro data file)
/// any attempt to read the next record will automatically skip
/// those extra fields of the current record. It would still leave
/// the extra fields at the end of the last record in the stream.
/// This would mean that the stream is not in a good state. For example,
/// if some non-avro information is stored at the end of the stream,
/// the consumers of such data would see the bytes left behind
/// by the avro decoder. Similar set of problems occur if the Decoder
/// consumes more than what it should.
virtual void drain() = 0;
};
/**
* Shared pointer to Decoder.
*/
using DecoderPtr = std::shared_ptr<Decoder>;
/**
* ResolvingDecoder is derived from \ref Decoder, with an additional
* function to obtain the field ordering of fields within a record.
*/
class AVRO_DECL ResolvingDecoder : public Decoder {
public:
/// Returns the order of fields for records.
/// The order of fields could be different from the order of their
/// order in the schema because the writer's field order could
/// be different. In order to avoid buffering and later use,
/// we return the values in the writer's field order.
virtual const std::vector<size_t> &fieldOrder() = 0;
};
/**
* Shared pointer to ResolvingDecoder.
*/
using ResolvingDecoderPtr = std::shared_ptr<ResolvingDecoder>;
/**
* Returns an decoder that can decode binary Avro standard.
*/
AVRO_DECL DecoderPtr binaryDecoder();
/**
* Returns an decoder that validates sequence of calls to an underlying
* Decoder against the given schema.
*/
AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema &schema,
const DecoderPtr &base);
/**
* Returns an decoder that can decode Avro standard for JSON.
*/
AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema &schema);
/**
* Returns a decoder that decodes avro data from base written according to
* writerSchema and resolves against readerSchema.
* The client uses the decoder as if the data were written using readerSchema.
* // FIXME: Handle out of order fields.
*/
AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
const ValidSchema &reader, const DecoderPtr &base);
} // namespace avro
#endif

View File

@ -1,173 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Encoder_hh__
#define avro_Encoder_hh__
#include "Config.hh"
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "Stream.hh"
#include "ValidSchema.hh"
/// \file
///
/// Low level support for encoding avro values.
/// This class has two types of functions. One type of functions support
/// the writing of leaf values (for example, encodeLong and
/// encodeString). These functions have analogs in Decoder.
///
/// The other type of functions support the writing of maps and arrays.
/// These functions are arrayStart, startItem, and arrayEnd
/// (and similar functions for maps).
/// Some implementations of Encoder handle the
/// buffering required to break large maps and arrays into blocks,
/// which is necessary for applications that want to do streaming.
namespace avro {
/**
* The abstract base class for all Avro encoders. The implementations
* differ in the method of encoding (binary versus JSON) or in capabilities
* such as ability to verify the order of invocation of different functions.
*/
class AVRO_DECL Encoder {
public:
virtual ~Encoder() = default;
/// All future encodings will go to os, which should be valid until
/// it is reset with another call to init() or the encoder is
/// destructed.
virtual void init(OutputStream &os) = 0;
/// Flushes any data in internal buffers.
virtual void flush() = 0;
/// Returns the number of bytes produced so far.
/// For a meaningful value, do a flush() before invoking this function.
virtual int64_t byteCount() const = 0;
/// Encodes a null to the current stream.
virtual void encodeNull() = 0;
/// Encodes a bool to the current stream
virtual void encodeBool(bool b) = 0;
/// Encodes a 32-bit int to the current stream.
virtual void encodeInt(int32_t i) = 0;
/// Encodes a 64-bit signed int to the current stream.
virtual void encodeLong(int64_t l) = 0;
/// Encodes a single-precision floating point number to the current stream.
virtual void encodeFloat(float f) = 0;
/// Encodes a double-precision floating point number to the current stream.
virtual void encodeDouble(double d) = 0;
/// Encodes a UTF-8 string to the current stream.
virtual void encodeString(const std::string &s) = 0;
/**
* Encodes arbitrary binary data into the current stream as Avro "bytes"
* data type.
* \param bytes Where the data is
* \param len Number of bytes at \p bytes.
*/
virtual void encodeBytes(const uint8_t *bytes, size_t len) = 0;
/**
* Encodes arbitrary binary data into the current stream as Avro "bytes"
* data type.
* \param bytes The data.
*/
void encodeBytes(const std::vector<uint8_t> &bytes) {
uint8_t b = 0;
encodeBytes(bytes.empty() ? &b : bytes.data(), bytes.size());
}
/// Encodes fixed length binary to the current stream.
virtual void encodeFixed(const uint8_t *bytes, size_t len) = 0;
/**
* Encodes an Avro data type Fixed.
* \param bytes The fixed, the length of which is taken as the size
* of fixed.
*/
void encodeFixed(const std::vector<uint8_t> &bytes) {
encodeFixed(bytes.data(), bytes.size());
}
/// Encodes enum to the current stream.
virtual void encodeEnum(size_t e) = 0;
/// Indicates that an array of items is being encoded.
virtual void arrayStart() = 0;
/// Indicates that the current array of items have ended.
virtual void arrayEnd() = 0;
/// Indicates that a map of items is being encoded.
virtual void mapStart() = 0;
/// Indicates that the current map of items have ended.
virtual void mapEnd() = 0;
/// Indicates that count number of items are to follow in the current array
/// or map.
virtual void setItemCount(size_t count) = 0;
/// Marks a beginning of an item in the current array or map.
virtual void startItem() = 0;
/// Encodes a branch of a union. The actual value is to follow.
virtual void encodeUnionIndex(size_t e) = 0;
};
/**
* Shared pointer to Encoder.
*/
using EncoderPtr = std::shared_ptr<Encoder>;
/**
* Returns an encoder that can encode binary Avro standard.
*/
AVRO_DECL EncoderPtr binaryEncoder();
/**
* Returns an encoder that validates sequence of calls to an underlying
* Encoder against the given schema.
*/
AVRO_DECL EncoderPtr validatingEncoder(const ValidSchema &schema,
const EncoderPtr &base);
/**
* Returns an encoder that encodes Avro standard for JSON.
*/
AVRO_DECL EncoderPtr jsonEncoder(const ValidSchema &schema);
/**
* Returns an encoder that encodes Avro standard for pretty printed JSON.
*/
AVRO_DECL EncoderPtr jsonPrettyEncoder(const ValidSchema &schema);
} // namespace avro
#endif

View File

@ -1,40 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Exception_hh__
#define avro_Exception_hh__
#include "Config.hh"
#include <boost/format.hpp>
#include <stdexcept>
namespace avro {
/// Wrapper for std::runtime_error that provides convenience constructor
/// for boost::format objects
class AVRO_DECL Exception : public virtual std::runtime_error {
public:
explicit Exception(const std::string &msg) : std::runtime_error(msg) {}
explicit Exception(const boost::format &msg) : std::runtime_error(boost::str(msg)) {}
};
} // namespace avro
#endif

View File

@ -1,152 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Generic_hh__
#define avro_Generic_hh__
#include <boost/utility.hpp>
#include "Config.hh"
#include "Decoder.hh"
#include "Encoder.hh"
#include "GenericDatum.hh"
#include "Types.hh"
namespace avro {
/**
* A utility class to read generic datum from decoders.
*/
class AVRO_DECL GenericReader : boost::noncopyable {
const ValidSchema schema_;
const bool isResolving_;
const DecoderPtr decoder_;
static void read(GenericDatum &datum, Decoder &d, bool isResolving);
public:
/**
* Constructs a reader for the given schema using the given decoder.
*/
GenericReader(ValidSchema s, const DecoderPtr &decoder);
/**
* Constructs a reader for the given reader's schema \c readerSchema
* using the given
* decoder which holds data matching writer's schema \c writerSchema.
*/
GenericReader(const ValidSchema &writerSchema,
const ValidSchema &readerSchema, const DecoderPtr &decoder);
/**
* Reads a value off the decoder.
*/
void read(GenericDatum &datum) const;
/**
* Drains any residual bytes in the input stream (e.g. because
* reader's schema has no use of them) and return unused bytes
* back to the underlying input stream.
*/
void drain() {
decoder_->drain();
}
/**
* Reads a generic datum from the stream, using the given schema.
*/
static void read(Decoder &d, GenericDatum &g);
/**
* Reads a generic datum from the stream, using the given schema.
*/
static void read(Decoder &d, GenericDatum &g, const ValidSchema &s);
};
/**
* A utility class to write generic datum to encoders.
*/
class AVRO_DECL GenericWriter : boost::noncopyable {
const ValidSchema schema_;
const EncoderPtr encoder_;
static void write(const GenericDatum &datum, Encoder &e);
public:
/**
* Constructs a writer for the given schema using the given encoder.
*/
GenericWriter(ValidSchema s, EncoderPtr encoder);
/**
* Writes a value onto the encoder.
*/
void write(const GenericDatum &datum) const;
/**
* Writes a generic datum on to the stream.
*/
static void write(Encoder &e, const GenericDatum &g);
/**
* Writes a generic datum on to the stream, using the given schema.
* Retained for backward compatibility.
*/
static void write(Encoder &e, const GenericDatum &g, const ValidSchema &) {
write(e, g);
}
};
template<typename T>
struct codec_traits;
/**
* Specialization of codec_traits for Generic datum along with its schema.
* This is maintained for compatibility with old code. Please use the
* cleaner codec_traits<GenericDatum> instead.
*/
template<>
struct codec_traits<std::pair<ValidSchema, GenericDatum>> {
/** Encodes */
static void encode(Encoder &e,
const std::pair<ValidSchema, GenericDatum> &p) {
GenericWriter::write(e, p.second, p.first);
}
/** Decodes */
static void decode(Decoder &d, std::pair<ValidSchema, GenericDatum> &p) {
GenericReader::read(d, p.second, p.first);
}
};
/**
* Specialization of codec_traits for GenericDatum.
*/
template<>
struct codec_traits<GenericDatum> {
/** Encodes */
static void encode(Encoder &e, const GenericDatum &g) {
GenericWriter::write(e, g);
}
/** Decodes */
static void decode(Decoder &d, GenericDatum &g) {
GenericReader::read(d, g);
}
};
} // namespace avro
#endif

View File

@ -1,604 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_GenericDatum_hh__
#define avro_GenericDatum_hh__
#include <cstdint>
#include <map>
#include <string>
#include <vector>
#if __cplusplus >= 201703L
#include <any>
#else
#include "boost/any.hpp"
#endif
#include "LogicalType.hh"
#include "Node.hh"
#include "ValidSchema.hh"
namespace avro {
/**
* Generic datum which can hold any Avro type. The datum has a type
* and a value. The type is one of the Avro data types. The C++ type for
* value corresponds to the Avro type.
* \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to
* to try to access values for <tt>null</tt>.
* \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt>
* \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>.
* \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>.
* \li Avro <tt>float</tt> maps to C++ <tt>float</tt>.
* \li Avro <tt>double</tt> maps to C++ <tt>double</tt>.
* \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>.
* \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector&lt;uint_t&gt;</tt>.
* \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>.
* \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>.
* \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>.
* \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>.
* \li There is no C++ type corresponding to Avro <tt>union</tt>. The
* object should have the C++ type corresponding to one of the constituent
* types of the union.
*
*/
class AVRO_DECL GenericDatum {
protected:
Type type_;
LogicalType logicalType_;
#if __cplusplus >= 201703L
std::any value_;
#else
boost::any value_;
#endif
explicit GenericDatum(Type t)
: type_(t), logicalType_(LogicalType::NONE) {}
GenericDatum(Type t, LogicalType logicalType)
: type_(t), logicalType_(logicalType) {}
template<typename T>
GenericDatum(Type t, LogicalType logicalType, const T &v)
: type_(t), logicalType_(logicalType), value_(v) {}
void init(const NodePtr &schema);
public:
/**
* The avro data type this datum holds.
*/
Type type() const;
/**
* The avro logical type that augments the main data type this datum holds.
*/
LogicalType logicalType() const;
/**
* Returns the value held by this datum.
* T The type for the value. This must correspond to the
* avro type returned by type().
*/
template<typename T>
const T &value() const;
/**
* Returns the reference to the value held by this datum, which
* can be used to change the contents. Please note that only
* value can be changed, the data type of the value held cannot
* be changed.
*
* T The type for the value. This must correspond to the
* avro type returned by type().
*/
template<typename T>
T &value();
/**
* Returns true if and only if this datum is a union.
*/
bool isUnion() const { return type_ == AVRO_UNION; }
/**
* Returns the index of the current branch, if this is a union.
* \sa isUnion().
*/
size_t unionBranch() const;
/**
* Selects a new branch in the union if this is a union.
* \sa isUnion().
*/
void selectBranch(size_t branch);
/// Makes a new AVRO_NULL datum.
GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
/// Makes a new AVRO_BOOL datum whose value is of type bool.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(bool v)
: type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_INT datum whose value is of type int32_t.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int32_t v)
: type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_LONG datum whose value is of type int64_t.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int64_t v)
: type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_FLOAT datum whose value is of type float.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(float v)
: type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_DOUBLE datum whose value is of type double.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(double v)
: type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_STRING datum whose value is of type std::string.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::string &v)
: type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
/// Makes a new AVRO_BYTES datum whose value is of type
/// std::vector<uint8_t>.
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
/**
* Constructs a datum corresponding to the given avro type.
* The value will the appropriate default corresponding to the
* data type.
* \param schema The schema that defines the avro type.
*/
/// We don't make this explicit constructor because we want to allow automatic conversion
// NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const NodePtr &schema);
/**
* Constructs a datum corresponding to the given avro type and set
* the value.
* \param schema The schema that defines the avro type.
* \param v The value for this type.
*/
template<typename T>
GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
init(schema);
#if __cplusplus >= 201703L
*std::any_cast<T>(&value_) = v;
#else
*boost::any_cast<T>(&value_) = v;
#endif
}
/**
* Constructs a datum corresponding to the given avro type.
* The value will the appropriate default corresponding to the
* data type.
* \param schema The schema that defines the avro type.
*/
explicit GenericDatum(const ValidSchema &schema);
};
/**
* The base class for all generic type for containers.
*/
class AVRO_DECL GenericContainer {
NodePtr schema_;
static void assertType(const NodePtr &schema, Type type);
protected:
/**
* Constructs a container corresponding to the given schema.
*/
GenericContainer(Type type, const NodePtr &s) : schema_(s) {
assertType(s, type);
}
public:
/// Returns the schema for this object
const NodePtr &schema() const {
return schema_;
}
};
/**
* Generic container for unions.
*/
class AVRO_DECL GenericUnion : public GenericContainer {
size_t curBranch_;
GenericDatum datum_;
public:
/**
* Constructs a generic union corresponding to the given schema \p schema,
* and the given value. The schema should be of Avro type union
* and the value should correspond to one of the branches of the union.
*/
explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
selectBranch(0);
}
/**
* Returns the index of the current branch.
*/
size_t currentBranch() const { return curBranch_; }
/**
* Selects a new branch. The type for the value is changed accordingly.
* \param branch The index for the selected branch.
*/
void selectBranch(size_t branch) {
if (curBranch_ != branch) {
datum_ = GenericDatum(schema()->leafAt(branch));
curBranch_ = branch;
}
}
/**
* Returns the datum corresponding to the currently selected branch
* in this union.
*/
GenericDatum &datum() {
return datum_;
}
/**
* Returns the datum corresponding to the currently selected branch
* in this union.
*/
const GenericDatum &datum() const {
return datum_;
}
};
/**
* The generic container for Avro records.
*/
class AVRO_DECL GenericRecord : public GenericContainer {
std::vector<GenericDatum> fields_;
public:
/**
* Constructs a generic record corresponding to the given schema \p schema,
* which should be of Avro type record.
*/
explicit GenericRecord(const NodePtr &schema);
/**
* Returns the number of fields in the current record.
*/
size_t fieldCount() const {
return fields_.size();
}
/**
* Returns index of the field with the given name \p name
*/
size_t fieldIndex(const std::string &name) const {
size_t index = 0;
if (!schema()->nameIndex(name, index)) {
throw Exception("Invalid field name: " + name);
}
return index;
}
/**
* Returns true if a field with the given name \p name is located in this r
* false otherwise
*/
bool hasField(const std::string &name) const {
size_t index = 0;
return schema()->nameIndex(name, index);
}
/**
* Returns the field with the given name \p name.
*/
const GenericDatum &field(const std::string &name) const {
return fieldAt(fieldIndex(name));
}
/**
* Returns the reference to the field with the given name \p name,
* which can be used to change the contents.
*/
GenericDatum &field(const std::string &name) {
return fieldAt(fieldIndex(name));
}
/**
* Returns the field at the given position \p pos.
*/
const GenericDatum &fieldAt(size_t pos) const {
return fields_[pos];
}
/**
* Returns the reference to the field at the given position \p pos,
* which can be used to change the contents.
*/
GenericDatum &fieldAt(size_t pos) {
return fields_[pos];
}
/**
* Replaces the field at the given position \p pos with \p v.
*/
void setFieldAt(size_t pos, const GenericDatum &v) {
// assertSameType(v, schema()->leafAt(pos));
fields_[pos] = v;
}
};
/**
* The generic container for Avro arrays.
*/
class AVRO_DECL GenericArray : public GenericContainer {
public:
/**
* The contents type for the array.
*/
typedef std::vector<GenericDatum> Value;
/**
* Constructs a generic array corresponding to the given schema \p schema,
* which should be of Avro type array.
*/
explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
}
/**
* Returns the contents of this array.
*/
const Value &value() const {
return value_;
}
/**
* Returns the reference to the contents of this array.
*/
Value &value() {
return value_;
}
private:
Value value_;
};
/**
* The generic container for Avro maps.
*/
class AVRO_DECL GenericMap : public GenericContainer {
public:
/**
* The contents type for the map.
*/
typedef std::vector<std::pair<std::string, GenericDatum>> Value;
/**
* Constructs a generic map corresponding to the given schema \p schema,
* which should be of Avro type map.
*/
explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
}
/**
* Returns the contents of this map.
*/
const Value &value() const {
return value_;
}
/**
* Returns the reference to the contents of this map.
*/
Value &value() {
return value_;
}
private:
Value value_;
};
/**
* Generic container for Avro enum.
*/
class AVRO_DECL GenericEnum : public GenericContainer {
size_t value_;
static size_t index(const NodePtr &schema, const std::string &symbol) {
size_t result;
if (schema->nameIndex(symbol, result)) {
return result;
}
throw Exception("No such symbol");
}
public:
/**
* Constructs a generic enum corresponding to the given schema \p schema,
* which should be of Avro type enum.
*/
explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
}
GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
}
/**
* Returns the symbol corresponding to the cardinal \p n. If the
* value for \p n is not within the limits an exception is thrown.
*/
const std::string &symbol(size_t n) {
if (n < schema()->names()) {
return schema()->nameAt(n);
}
throw Exception("Not as many symbols");
}
/**
* Returns the cardinal for the given symbol \c symbol. If the symbol
* is not defined for this enum and exception is thrown.
*/
size_t index(const std::string &symbol) const {
return index(schema(), symbol);
}
/**
* Set the value for this enum corresponding to the given symbol \c symbol.
*/
size_t set(const std::string &symbol) {
return value_ = index(symbol);
}
/**
* Set the value for this enum corresponding to the given cardinal \c n.
*/
void set(size_t n) {
if (n < schema()->names()) {
value_ = n;
return;
}
throw Exception("Not as many symbols");
}
/**
* Returns the cardinal for the current value of this enum.
*/
size_t value() const {
return value_;
}
/**
* Returns the symbol for the current value of this enum.
*/
const std::string &symbol() const {
return schema()->nameAt(value_);
}
};
/**
* Generic container for Avro fixed.
*/
class AVRO_DECL GenericFixed : public GenericContainer {
std::vector<uint8_t> value_;
public:
/**
* Constructs a generic enum corresponding to the given schema \p schema,
* which should be of Avro type fixed.
*/
explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
value_.resize(schema->fixedSize());
}
GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
/**
* Returns the contents of this fixed.
*/
const std::vector<uint8_t> &value() const {
return value_;
}
/**
* Returns the reference to the contents of this fixed.
*/
std::vector<uint8_t> &value() {
return value_;
}
};
inline Type GenericDatum::type() const {
return (type_ == AVRO_UNION) ?
#if __cplusplus >= 201703L
std::any_cast<GenericUnion>(&value_)->datum().type()
:
#else
boost::any_cast<GenericUnion>(&value_)->datum().type()
:
#endif
type_;
}
inline LogicalType GenericDatum::logicalType() const {
return (type_ == AVRO_UNION) ?
#if __cplusplus >= 201703L
std::any_cast<GenericUnion>(&value_)->datum().logicalType() :
#else
boost::any_cast<GenericUnion>(&value_)->datum().logicalType() :
#endif
logicalType_;
}
template<typename T>
T &GenericDatum::value() {
return (type_ == AVRO_UNION) ?
#if __cplusplus >= 201703L
std::any_cast<GenericUnion>(&value_)->datum().value<T>()
: *std::any_cast<T>(&value_);
#else
boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
: *boost::any_cast<T>(&value_);
#endif
}
template<typename T>
const T &GenericDatum::value() const {
return (type_ == AVRO_UNION) ?
#if __cplusplus >= 201703L
std::any_cast<GenericUnion>(&value_)->datum().value<T>()
: *std::any_cast<T>(&value_);
#else
boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
: *boost::any_cast<T>(&value_);
#endif
}
inline size_t GenericDatum::unionBranch() const {
#if __cplusplus >= 201703L
return std::any_cast<GenericUnion>(&value_)->currentBranch();
#else
return boost::any_cast<GenericUnion>(&value_)->currentBranch();
#endif
}
inline void GenericDatum::selectBranch(size_t branch) {
#if __cplusplus >= 201703L
std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
#else
boost::any_cast<GenericUnion>(&value_)->selectBranch(branch);
#endif
}
} // namespace avro
#endif // avro_GenericDatum_hh__

View File

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Layout_hh__
#define avro_Layout_hh__
#include "Config.hh"
#include <boost/noncopyable.hpp>
/// \file Layout.hh
///
namespace avro {
class AVRO_DECL Layout : private boost::noncopyable {
protected:
explicit Layout(size_t offset = 0) : offset_(offset) {}
public:
size_t offset() const {
return offset_;
}
virtual ~Layout() = default;
private:
const size_t offset_;
};
class AVRO_DECL PrimitiveLayout : public Layout {
public:
explicit PrimitiveLayout(size_t offset = 0) : Layout(offset) {}
};
class AVRO_DECL CompoundLayout : public Layout {
public:
explicit CompoundLayout(size_t offset = 0) : Layout(offset) {}
void add(std::unique_ptr<Layout> &layout) {
layouts_.push_back(std::move(layout));
}
const Layout &at(size_t idx) const {
return *layouts_.at(idx);
}
private:
std::vector<std::unique_ptr<Layout>> layouts_;
};
} // namespace avro
#endif

View File

@ -1,65 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_LogicalType_hh__
#define avro_LogicalType_hh__
#include <iostream>
#include "Config.hh"
namespace avro {
class AVRO_DECL LogicalType {
public:
enum Type {
NONE,
DECIMAL,
DATE,
TIME_MILLIS,
TIME_MICROS,
TIMESTAMP_MILLIS,
TIMESTAMP_MICROS,
DURATION,
UUID
};
explicit LogicalType(Type type);
Type type() const;
// Precision and scale can only be set for the DECIMAL logical type.
// Precision must be positive and scale must be either positive or zero. The
// setters will throw an exception if they are called on any type other
// than DECIMAL.
void setPrecision(int precision);
int precision() const { return precision_; }
void setScale(int scale);
int scale() const { return scale_; }
void printJson(std::ostream &os) const;
private:
Type type_;
int precision_;
int scale_;
};
} // namespace avro
#endif

View File

@ -1,204 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Node_hh__
#define avro_Node_hh__
#include "Config.hh"
#include <boost/noncopyable.hpp>
#include <cassert>
#include <memory>
#include <utility>
#include "Exception.hh"
#include "LogicalType.hh"
#include "SchemaResolution.hh"
#include "Types.hh"
namespace avro {
class Node;
class GenericDatum;
using NodePtr = std::shared_ptr<Node>;
class AVRO_DECL Name {
std::string ns_;
std::string simpleName_;
public:
Name() = default;
explicit Name(const std::string &fullname);
Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { check(); }
std::string fullname() const;
const std::string &ns() const { return ns_; }
const std::string &simpleName() const { return simpleName_; }
void ns(std::string n) { ns_ = std::move(n); }
void simpleName(std::string n) { simpleName_ = std::move(n); }
void fullname(const std::string &n);
bool operator<(const Name &n) const;
void check() const;
bool operator==(const Name &n) const;
bool operator!=(const Name &n) const { return !((*this) == n); }
void clear() {
ns_.clear();
simpleName_.clear();
}
explicit operator std::string() const {
return fullname();
}
};
inline std::ostream &operator<<(std::ostream &os, const Name &n) {
return os << n.fullname();
}
/// Node is the building block for parse trees. Each node represents an avro
/// type. Compound types have leaf nodes that represent the types they are
/// composed of.
///
/// The user does not use the Node object directly, they interface with Schema
/// objects.
///
/// The Node object uses reference-counted pointers. This is so that schemas
/// may be reused in other schemas, without needing to worry about memory
/// deallocation for nodes that are added to multiple schema parse trees.
///
/// Node has minimal implementation, serving as an abstract base class for
/// different node types.
///
class AVRO_DECL Node : private boost::noncopyable {
public:
explicit Node(Type type) : type_(type),
logicalType_(LogicalType::NONE),
locked_(false) {}
virtual ~Node();
Type type() const {
return type_;
}
LogicalType logicalType() const {
return logicalType_;
}
void setLogicalType(LogicalType logicalType);
void lock() {
locked_ = true;
}
bool locked() const {
return locked_;
}
virtual bool hasName() const = 0;
void setName(const Name &name) {
checkLock();
checkName(name);
doSetName(name);
}
virtual const Name &name() const = 0;
virtual const std::string &getDoc() const = 0;
void setDoc(const std::string &doc) {
checkLock();
doSetDoc(doc);
}
void addLeaf(const NodePtr &newLeaf) {
checkLock();
doAddLeaf(newLeaf);
}
virtual size_t leaves() const = 0;
virtual const NodePtr &leafAt(size_t index) const = 0;
virtual const GenericDatum &defaultValueAt(size_t index) {
throw Exception(boost::format("No default value at: %1%") % index);
}
void addName(const std::string &name) {
checkLock();
checkName(Name(name));
doAddName(name);
}
virtual size_t names() const = 0;
virtual const std::string &nameAt(size_t index) const = 0;
virtual bool nameIndex(const std::string &name, size_t &index) const = 0;
void setFixedSize(size_t size) {
checkLock();
doSetFixedSize(size);
}
virtual size_t fixedSize() const = 0;
virtual bool isValid() const = 0;
virtual SchemaResolution resolve(const Node &reader) const = 0;
virtual void printJson(std::ostream &os, size_t depth) const = 0;
virtual void printBasicInfo(std::ostream &os) const = 0;
virtual void setLeafToSymbolic(size_t index, const NodePtr &node) = 0;
// Serialize the default value GenericDatum g for the node contained
// in a record node.
virtual void printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const = 0;
protected:
void checkLock() const {
if (locked()) {
throw Exception("Cannot modify locked schema");
}
}
virtual void checkName(const Name &name) const {
name.check();
}
virtual void doSetName(const Name &name) = 0;
virtual void doSetDoc(const std::string &name) = 0;
virtual void doAddLeaf(const NodePtr &newLeaf) = 0;
virtual void doAddName(const std::string &name) = 0;
virtual void doSetFixedSize(size_t size) = 0;
private:
const Type type_;
LogicalType logicalType_;
bool locked_;
};
} // namespace avro
namespace std {
inline std::ostream &operator<<(std::ostream &os, const avro::Node &n) {
n.printJson(os, 0);
return os;
}
} // namespace std
#endif

View File

@ -1,201 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_NodeConcepts_hh__
#define avro_NodeConcepts_hh__
#include "Config.hh"
#include "Exception.hh"
#include <map>
#include <vector>
namespace avro {
///
/// The concept classes are used to simplify NodeImpl. Since different types
/// of avro types carry different attributes, such as names, or field names for
/// record members. Using the concept class of NoAttribute vs Attribute, the
/// NodeImpl object can enable/disable the attribute, but the code is the same
/// in either case.
///
/// Furthermore, attributes may have different types, for example, most
/// attributes are strings, but fixed types have a size attribute, which is
/// integer.
///
/// Since compound types are composed of other types, the leaf attribute
/// concepts extend a NodeImpl to include leaf nodes, and attributes for leaf
/// nodes, which are used to build parse trees.
///
///
namespace concepts {
template<typename Attribute>
struct NoAttribute {
static const bool hasAttribute = false;
size_t size() const {
return 0;
}
void add(const Attribute & /* attr */) {
// There must be an add function for the generic NodeImpl, but the
// Node APIs ensure that it is never called, the throw here is
// just in case
throw Exception("This type does not have attribute");
}
const Attribute &get(size_t /* index */ = 0) const {
// There must be an get function for the generic NodeImpl, but the
// Node APIs ensure that it is never called, the throw here is
// just in case
throw Exception("This type does not have attribute");
}
Attribute &get(size_t /* index */ = 0) {
// There must be an get function for the generic NodeImpl, but the
// Node APIs ensure that it is never called, the throw here is
// just in case
throw Exception("This type does not have attribute");
}
};
template<typename Attribute>
struct SingleAttribute {
static const bool hasAttribute = true;
SingleAttribute() : attr_() {}
explicit SingleAttribute(const Attribute &a) : attr_(a) {}
// copy constructing from another single attribute is allowed
SingleAttribute(const SingleAttribute<Attribute> &rhs) : attr_(rhs.attr_) {}
// copy constructing from a no attribute is allowed
explicit SingleAttribute(const NoAttribute<Attribute> &rhs) : attr_() {}
size_t size() const {
return 1;
}
void add(const Attribute &attr) {
attr_ = attr;
}
const Attribute &get(size_t index = 0) const {
if (index != 0) {
throw Exception("SingleAttribute has only 1 value");
}
return attr_;
}
Attribute &get(size_t index = 0) {
if (index != 0) {
throw Exception("SingleAttribute has only 1 value");
}
return attr_;
}
private:
template<typename T>
friend struct MultiAttribute;
Attribute attr_;
};
template<typename Attribute>
struct MultiAttribute {
static const bool hasAttribute = true;
MultiAttribute() = default;
// copy constructing from another single attribute is allowed, it
// pushes the attribute
explicit MultiAttribute(const SingleAttribute<Attribute> &rhs) {
// since map is the only type that does this we know it's
// final size will be two, so reserve
attrs_.reserve(2);
attrs_.push_back(rhs.attr_);
}
MultiAttribute(const MultiAttribute<Attribute> &rhs) : attrs_(rhs.attrs_) {}
explicit MultiAttribute(const NoAttribute<Attribute> &rhs) {}
size_t size() const {
return attrs_.size();
}
void add(const Attribute &attr) {
attrs_.push_back(attr);
}
const Attribute &get(size_t index = 0) const {
return attrs_.at(index);
}
Attribute &get(size_t index) {
return attrs_.at(index);
}
private:
std::vector<Attribute> attrs_;
};
template<typename T>
struct NameIndexConcept {
bool lookup(const std::string &name, size_t &index) const {
throw Exception("Name index does not exist");
}
bool add(const ::std::string &name, size_t) {
throw Exception("Name index does not exist");
}
};
template<>
struct NameIndexConcept<MultiAttribute<std::string>> {
using IndexMap = std::map<std::string, size_t>;
bool lookup(const std::string &name, size_t &index) const {
auto iter = map_.find(name);
if (iter == map_.end()) {
return false;
}
index = iter->second;
return true;
}
bool add(const ::std::string &name, size_t index) {
bool added = false;
auto lb = map_.lower_bound(name);
if (lb == map_.end() || map_.key_comp()(name, lb->first)) {
map_.insert(lb, IndexMap::value_type(name, index));
added = true;
}
return added;
}
private:
IndexMap map_;
};
} // namespace concepts
} // namespace avro
#endif

View File

@ -1,539 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_NodeImpl_hh__
#define avro_NodeImpl_hh__
#include "Config.hh"
#include "GenericDatum.hh"
#include <iomanip>
#include <iostream>
#include <limits>
#include <memory>
#include <set>
#include <sstream>
#include <utility>
#include "Node.hh"
#include "NodeConcepts.hh"
namespace avro {
/// Implementation details for Node. NodeImpl represents all the avro types,
/// whose properties are enabled and disabled by selecting concept classes.
template<
class NameConcept,
class LeavesConcept,
class LeafNamesConcept,
class SizeConcept>
class NodeImpl : public Node {
protected:
explicit NodeImpl(Type type) : Node(type),
nameAttribute_(),
docAttribute_(),
leafAttributes_(),
leafNameAttributes_(),
sizeAttribute_() {}
NodeImpl(Type type,
const NameConcept &name,
const LeavesConcept &leaves,
const LeafNamesConcept &leafNames,
const SizeConcept &size) : Node(type),
nameAttribute_(name),
docAttribute_(),
leafAttributes_(leaves),
leafNameAttributes_(leafNames),
sizeAttribute_(size) {}
// Ctor with "doc"
NodeImpl(Type type,
const NameConcept &name,
const concepts::SingleAttribute<std::string> &doc,
const LeavesConcept &leaves,
const LeafNamesConcept &leafNames,
const SizeConcept &size) : Node(type),
nameAttribute_(name),
docAttribute_(doc),
leafAttributes_(leaves),
leafNameAttributes_(leafNames),
sizeAttribute_(size) {}
void swap(NodeImpl &impl) {
std::swap(nameAttribute_, impl.nameAttribute_);
std::swap(docAttribute_, impl.docAttribute_);
std::swap(leafAttributes_, impl.leafAttributes_);
std::swap(leafNameAttributes_, impl.leafNameAttributes_);
std::swap(sizeAttribute_, impl.sizeAttribute_);
std::swap(nameIndex_, impl.nameIndex_);
}
bool hasName() const override {
// e.g.: true for single and multi-attributes, false for no-attributes.
return NameConcept::hasAttribute;
}
void doSetName(const Name &name) override {
nameAttribute_.add(name);
}
const Name &name() const override {
return nameAttribute_.get();
}
void doSetDoc(const std::string &doc) override {
docAttribute_.add(doc);
}
const std::string &getDoc() const override {
return docAttribute_.get();
}
void doAddLeaf(const NodePtr &newLeaf) final {
leafAttributes_.add(newLeaf);
}
size_t leaves() const override {
return leafAttributes_.size();
}
const NodePtr &leafAt(size_t index) const override {
return leafAttributes_.get(index);
}
void doAddName(const std::string &name) override {
if (!nameIndex_.add(name, leafNameAttributes_.size())) {
throw Exception(boost::format("Cannot add duplicate name: %1%") % name);
}
leafNameAttributes_.add(name);
}
size_t names() const override {
return leafNameAttributes_.size();
}
const std::string &nameAt(size_t index) const override {
return leafNameAttributes_.get(index);
}
bool nameIndex(const std::string &name, size_t &index) const override {
return nameIndex_.lookup(name, index);
}
void doSetFixedSize(size_t size) override {
sizeAttribute_.add(size);
}
size_t fixedSize() const override {
return sizeAttribute_.get();
}
bool isValid() const override = 0;
void printBasicInfo(std::ostream &os) const override;
void setLeafToSymbolic(size_t index, const NodePtr &node) override;
SchemaResolution furtherResolution(const Node &reader) const {
SchemaResolution match = RESOLVE_NO_MATCH;
if (reader.type() == AVRO_SYMBOLIC) {
// resolve the symbolic type, and check again
const NodePtr &node = reader.leafAt(0);
match = resolve(*node);
} else if (reader.type() == AVRO_UNION) {
// in this case, need to see if there is an exact match for the
// writer's type, or if not, the first one that can be promoted to a
// match
for (size_t i = 0; i < reader.leaves(); ++i) {
const NodePtr &node = reader.leafAt(i);
SchemaResolution thisMatch = resolve(*node);
// if matched then the search is done
if (thisMatch == RESOLVE_MATCH) {
match = thisMatch;
break;
}
// thisMatch is either no match, or promotable, this will set match to
// promotable if it hasn't been set already
if (match == RESOLVE_NO_MATCH) {
match = thisMatch;
}
}
}
return match;
}
NameConcept nameAttribute_;
// Rem: NameConcept type is HasName (= SingleAttribute<Name>), we use std::string instead
concepts::SingleAttribute<std::string> docAttribute_; /** Doc used to compare schemas */
LeavesConcept leafAttributes_;
LeafNamesConcept leafNameAttributes_;
SizeConcept sizeAttribute_;
concepts::NameIndexConcept<LeafNamesConcept> nameIndex_;
};
using NoName = concepts::NoAttribute<Name>;
using HasName = concepts::SingleAttribute<Name>;
using HasDoc = concepts::SingleAttribute<std::string>;
using NoLeaves = concepts::NoAttribute<NodePtr>;
using SingleLeaf = concepts::SingleAttribute<NodePtr>;
using MultiLeaves = concepts::MultiAttribute<NodePtr>;
using NoLeafNames = concepts::NoAttribute<std::string>;
using LeafNames = concepts::MultiAttribute<std::string>;
using NoSize = concepts::NoAttribute<int>;
using HasSize = concepts::SingleAttribute<int>;
using NodeImplPrimitive = NodeImpl<NoName, NoLeaves, NoLeafNames, NoSize>;
using NodeImplSymbolic = NodeImpl<HasName, NoLeaves, NoLeafNames, NoSize>;
using NodeImplRecord = NodeImpl<HasName, MultiLeaves, LeafNames, NoSize>;
using NodeImplEnum = NodeImpl<HasName, NoLeaves, LeafNames, NoSize>;
using NodeImplArray = NodeImpl<NoName, SingleLeaf, NoLeafNames, NoSize>;
using NodeImplMap = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoSize>;
using NodeImplUnion = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoSize>;
using NodeImplFixed = NodeImpl<HasName, NoLeaves, NoLeafNames, HasSize>;
class AVRO_DECL NodePrimitive : public NodeImplPrimitive {
public:
explicit NodePrimitive(Type type) : NodeImplPrimitive(type) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return true;
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeSymbolic : public NodeImplSymbolic {
using NodeWeakPtr = std::weak_ptr<Node>;
public:
NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {}
explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()) {}
NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()), actualNode_(n) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return (nameAttribute_.size() == 1);
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
bool isSet() const {
return (actualNode_.lock() != nullptr);
}
NodePtr getNode() const {
NodePtr node = actualNode_.lock();
if (!node) {
throw Exception(boost::format("Could not follow symbol %1%") % name());
}
return node;
}
void setNode(const NodePtr &node) {
actualNode_ = node;
}
protected:
NodeWeakPtr actualNode_;
};
class AVRO_DECL NodeRecord : public NodeImplRecord {
std::vector<GenericDatum> defaultValues;
public:
NodeRecord() : NodeImplRecord(AVRO_RECORD) {}
NodeRecord(const HasName &name, const MultiLeaves &fields,
const LeafNames &fieldsNames,
std::vector<GenericDatum> dv);
NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
const LeafNames &fieldsNames,
std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, NoSize()),
defaultValues(std::move(dv)) {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
throw Exception(boost::format(
"Cannot add duplicate field: %1%")
% leafNameAttributes_.get(i));
}
}
}
void swap(NodeRecord &r) {
NodeImplRecord::swap(r);
defaultValues.swap(r.defaultValues);
}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size()));
}
const GenericDatum &defaultValueAt(size_t index) override {
return defaultValues[index];
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeEnum : public NodeImplEnum {
public:
NodeEnum() : NodeImplEnum(AVRO_ENUM) {}
NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
throw Exception(boost::format("Cannot add duplicate enum: %1%") % leafNameAttributes_.get(i));
}
}
}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return (
(nameAttribute_.size() == 1) && (leafNameAttributes_.size() > 0));
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeArray : public NodeImplArray {
public:
NodeArray() : NodeImplArray(AVRO_ARRAY) {}
explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize()) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return (leafAttributes_.size() == 1);
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeMap : public NodeImplMap {
public:
NodeMap();
explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoSize()) {
// need to add the key for the map too
NodePtr key(new NodePrimitive(AVRO_STRING));
doAddLeaf(key);
// key goes before value
std::swap(leafAttributes_.get(0), leafAttributes_.get(1));
}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return (leafAttributes_.size() == 2);
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeUnion : public NodeImplUnion {
public:
NodeUnion() : NodeImplUnion(AVRO_UNION) {}
explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize()) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
std::set<std::string> seen;
if (leafAttributes_.size() >= 1) {
for (size_t i = 0; i < leafAttributes_.size(); ++i) {
std::string name;
const NodePtr &n = leafAttributes_.get(i);
switch (n->type()) {
case AVRO_STRING:
name = "string";
break;
case AVRO_BYTES:
name = "bytes";
break;
case AVRO_INT:
name = "int";
break;
case AVRO_LONG:
name = "long";
break;
case AVRO_FLOAT:
name = "float";
break;
case AVRO_DOUBLE:
name = "double";
break;
case AVRO_BOOL:
name = "bool";
break;
case AVRO_NULL:
name = "null";
break;
case AVRO_ARRAY:
name = "array";
break;
case AVRO_MAP:
name = "map";
break;
case AVRO_RECORD:
case AVRO_ENUM:
case AVRO_UNION:
case AVRO_FIXED:
case AVRO_SYMBOLIC:
name = n->name().fullname();
break;
default: return false;
}
if (seen.find(name) != seen.end()) {
return false;
}
seen.insert(name);
}
return true;
}
return false;
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
class AVRO_DECL NodeFixed : public NodeImplFixed {
public:
NodeFixed() : NodeImplFixed(AVRO_FIXED) {}
NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
return (
(nameAttribute_.size() == 1) && (sizeAttribute_.size() == 1));
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
};
template<class A, class B, class C, class D>
inline void
NodeImpl<A, B, C, D>::setLeafToSymbolic(size_t index, const NodePtr &node) {
if (!B::hasAttribute) {
throw Exception("Cannot change leaf node for nonexistent leaf");
}
auto &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index));
if (replaceNode->name() != node->name()) {
throw Exception("Symbolic name does not match the name of the schema it references");
}
auto symbol = std::make_shared<NodeSymbolic>();
symbol->setName(node->name());
symbol->setNode(node);
replaceNode = symbol;
}
template<class A, class B, class C, class D>
inline void
NodeImpl<A, B, C, D>::printBasicInfo(std::ostream &os) const {
os << type();
if (hasName()) {
os << ' ' << nameAttribute_.get();
}
if (D::hasAttribute) {
os << " " << sizeAttribute_.get();
}
os << '\n';
int count = leaves();
count = count ? count : names();
for (int i = 0; i < count; ++i) {
if (C::hasAttribute) {
os << "name " << nameAt(i) << '\n';
}
if (type() != AVRO_SYMBOLIC && leafAttributes_.hasAttribute) {
leafAt(i)->printBasicInfo(os);
}
}
if (isCompound(type())) {
os << "end " << type() << '\n';
}
}
inline NodePtr resolveSymbol(const NodePtr &node) {
if (node->type() != AVRO_SYMBOLIC) {
throw Exception("Only symbolic nodes may be resolved");
}
std::shared_ptr<NodeSymbolic> symNode = std::static_pointer_cast<NodeSymbolic>(node);
return symNode->getNode();
}
template<typename T>
inline std::string intToHex(T i) {
std::stringstream stream;
stream << "\\u"
<< std::setfill('0') << std::setw(sizeof(T))
<< std::hex << i;
return stream.str();
}
} // namespace avro
#endif

View File

@ -1,143 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Parser_hh__
#define avro_Parser_hh__
#include "Config.hh"
#include "Reader.hh"
#include <array>
namespace avro {
///
/// Class that wraps a reader or ValidatingReade with an interface that uses
/// explicit get* names instead of getValue
///
template<class Reader>
class Parser : private boost::noncopyable {
public:
// Constructor only works with Writer
explicit Parser(const InputBuffer &in) : reader_(in) {}
/// Constructor only works with ValidatingWriter
Parser(const ValidSchema &schema, const InputBuffer &in) : reader_(schema, in) {}
void readNull() {
Null null;
reader_.readValue(null);
}
bool readBool() {
bool val;
reader_.readValue(val);
return val;
}
int32_t readInt() {
int32_t val;
reader_.readValue(val);
return val;
}
int64_t readLong() {
int64_t val;
reader_.readValue(val);
return val;
}
float readFloat() {
float val;
reader_.readValue(val);
return val;
}
double readDouble() {
double val;
reader_.readValue(val);
return val;
}
void readString(std::string &val) {
reader_.readValue(val);
}
void readBytes(std::vector<uint8_t> &val) {
reader_.readBytes(val);
}
template<size_t N>
void readFixed(uint8_t (&val)[N]) {
reader_.readFixed(val);
}
template<size_t N>
void readFixed(std::array<uint8_t, N> &val) {
reader_.readFixed(val);
}
void readRecord() {
reader_.readRecord();
}
void readRecordEnd() {
reader_.readRecordEnd();
}
int64_t readArrayBlockSize() {
return reader_.readArrayBlockSize();
}
int64_t readUnion() {
return reader_.readUnion();
}
int64_t readEnum() {
return reader_.readEnum();
}
int64_t readMapBlockSize() {
return reader_.readMapBlockSize();
}
private:
friend Type nextType(Parser<ValidatingReader> &p);
friend bool currentRecordName(Parser<ValidatingReader> &p, std::string &name);
friend bool nextFieldName(Parser<ValidatingReader> &p, std::string &name);
Reader reader_;
};
inline Type nextType(Parser<ValidatingReader> &p) {
return p.reader_.nextType();
}
inline bool currentRecordName(Parser<ValidatingReader> &p, std::string &name) {
return p.reader_.currentRecordName(name);
}
inline bool nextFieldName(Parser<ValidatingReader> &p, std::string &name) {
return p.reader_.nextFieldName(name);
}
} // namespace avro
#endif

View File

@ -1,201 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Reader_hh__
#define avro_Reader_hh__
#include <array>
#include <boost/noncopyable.hpp>
#include <cstdint>
#include <vector>
#include "Config.hh"
#include "Types.hh"
#include "Validator.hh"
#include "Zigzag.hh"
#include "buffer/BufferReader.hh"
namespace avro {
///
/// Parses from an avro encoding to the requested type. Assumes the next item
/// in the avro binary data is the expected type.
///
template<class ValidatorType>
class ReaderImpl : private boost::noncopyable {
public:
explicit ReaderImpl(const InputBuffer &buffer) : reader_(buffer) {}
ReaderImpl(const ValidSchema &schema, const InputBuffer &buffer) : validator_(schema),
reader_(buffer) {}
void readValue(Null &) {
validator_.checkTypeExpected(AVRO_NULL);
}
void readValue(bool &val) {
validator_.checkTypeExpected(AVRO_BOOL);
uint8_t intVal = 0;
reader_.read(intVal);
val = (intVal != 0);
}
void readValue(int32_t &val) {
validator_.checkTypeExpected(AVRO_INT);
auto encoded = static_cast<uint32_t>(readVarInt());
val = decodeZigzag32(encoded);
}
void readValue(int64_t &val) {
validator_.checkTypeExpected(AVRO_LONG);
uint64_t encoded = readVarInt();
val = decodeZigzag64(encoded);
}
void readValue(float &val) {
validator_.checkTypeExpected(AVRO_FLOAT);
union {
float f;
uint32_t i;
} v;
reader_.read(v.i);
val = v.f;
}
void readValue(double &val) {
validator_.checkTypeExpected(AVRO_DOUBLE);
union {
double d;
uint64_t i;
} v;
reader_.read(v.i);
val = v.d;
}
void readValue(std::string &val) {
validator_.checkTypeExpected(AVRO_STRING);
auto size = static_cast<size_t>(readSize());
reader_.read(val, size);
}
void readBytes(std::vector<uint8_t> &val) {
validator_.checkTypeExpected(AVRO_BYTES);
auto size = static_cast<size_t>(readSize());
val.resize(size);
reader_.read(reinterpret_cast<char *>(val.data()), size);
}
void readFixed(uint8_t *val, size_t size) {
validator_.checkFixedSizeExpected(size);
reader_.read(reinterpret_cast<char *>(val), size);
}
template<size_t N>
void readFixed(uint8_t (&val)[N]) {
this->readFixed(val, N);
}
template<size_t N>
void readFixed(std::array<uint8_t, N> &val) {
this->readFixed(val.data(), N);
}
void readRecord() {
validator_.checkTypeExpected(AVRO_RECORD);
validator_.checkTypeExpected(AVRO_LONG);
validator_.setCount(1);
}
void readRecordEnd() {
validator_.checkTypeExpected(AVRO_RECORD);
validator_.checkTypeExpected(AVRO_LONG);
validator_.setCount(0);
}
int64_t readArrayBlockSize() {
validator_.checkTypeExpected(AVRO_ARRAY);
return readCount();
}
int64_t readUnion() {
validator_.checkTypeExpected(AVRO_UNION);
return readCount();
}
int64_t readEnum() {
validator_.checkTypeExpected(AVRO_ENUM);
return readCount();
}
int64_t readMapBlockSize() {
validator_.checkTypeExpected(AVRO_MAP);
return readCount();
}
Type nextType() const {
return validator_.nextTypeExpected();
}
bool currentRecordName(std::string &name) const {
return validator_.getCurrentRecordName(name);
}
bool nextFieldName(std::string &name) const {
return validator_.getNextFieldName(name);
}
private:
uint64_t readVarInt() {
uint64_t encoded = 0;
uint8_t val = 0;
int shift = 0;
do {
reader_.read(val);
uint64_t newBits = static_cast<uint64_t>(val & 0x7f) << shift;
encoded |= newBits;
shift += 7;
} while (val & 0x80);
return encoded;
}
int64_t readSize() {
uint64_t encoded = readVarInt();
int64_t size = decodeZigzag64(encoded);
return size;
}
int64_t readCount() {
validator_.checkTypeExpected(AVRO_LONG);
int64_t count = readSize();
validator_.setCount(count);
return count;
}
ValidatorType validator_;
BufferReader reader_;
};
using Reader = ReaderImpl<NullValidator>;
using ValidatingReader = ReaderImpl<Validator>;
} // namespace avro
#endif

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Resolver_hh__
#define avro_Resolver_hh__
#include <boost/noncopyable.hpp>
#include <cstdint>
#include <memory>
#include "Config.hh"
#include "Reader.hh"
/// \file Resolver.hh
///
namespace avro {
class ValidSchema;
class Layout;
class AVRO_DECL Resolver : private boost::noncopyable {
public:
virtual void parse(Reader &reader, uint8_t *address) const = 0;
virtual ~Resolver() = default;
};
std::unique_ptr<Resolver> constructResolver(
const ValidSchema &writerSchema,
const ValidSchema &readerSchema,
const Layout &readerLayout);
} // namespace avro
#endif

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_ResolverSchema_hh__
#define avro_ResolverSchema_hh__
#include <boost/noncopyable.hpp>
#include <cstdint>
#include <memory>
#include "Config.hh"
#include "Reader.hh"
/// \file ResolverSchema.hh
///
namespace avro {
class ValidSchema;
class Layout;
class Resolver;
class AVRO_DECL ResolverSchema {
public:
ResolverSchema(const ValidSchema &writer, const ValidSchema &reader, const Layout &readerLayout);
private:
friend class ResolvingReader;
void parse(Reader &reader, uint8_t *address);
std::shared_ptr<Resolver> resolver_;
};
} // namespace avro
#endif

View File

@ -1,49 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_ResolvingReader_hh__
#define avro_ResolvingReader_hh__
#include <boost/noncopyable.hpp>
#include <stdint.h>
#include "Config.hh"
#include "Reader.hh"
#include "ResolverSchema.hh"
namespace avro {
class AVRO_DECL ResolvingReader : private boost::noncopyable {
public:
ResolvingReader(const ResolverSchema &schema, const InputBuffer &in) : reader_(in),
schema_(schema) {}
template<typename T>
void parse(T &object) {
schema_.parse(reader_, reinterpret_cast<uint8_t *>(&object));
}
private:
Reader reader_;
ResolverSchema schema_;
};
} // namespace avro
#endif

View File

@ -1,143 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Schema_hh__
#define avro_Schema_hh__
#include "Config.hh"
#include "NodeImpl.hh"
#include <string>
/// \file
///
/// Schemas for representing all the avro types. The compound schema objects
/// allow composition from other schemas.
///
namespace avro {
/// The root Schema object is a base class. Nobody constructs this class directly.
class AVRO_DECL Schema {
public:
virtual ~Schema() = default;
Type type() const {
return node_->type();
}
const NodePtr &root() const {
return node_;
}
NodePtr &root() {
return node_;
}
protected:
explicit Schema(NodePtr node) : node_(std::move(node)) {}
explicit Schema(Node *node) : node_(node) {}
NodePtr node_;
};
class AVRO_DECL NullSchema : public Schema {
public:
NullSchema() : Schema(new NodePrimitive(AVRO_NULL)) {}
};
class AVRO_DECL BoolSchema : public Schema {
public:
BoolSchema() : Schema(new NodePrimitive(AVRO_BOOL)) {}
};
class AVRO_DECL IntSchema : public Schema {
public:
IntSchema() : Schema(new NodePrimitive(AVRO_INT)) {}
};
class AVRO_DECL LongSchema : public Schema {
public:
LongSchema() : Schema(new NodePrimitive(AVRO_LONG)) {}
};
class AVRO_DECL FloatSchema : public Schema {
public:
FloatSchema() : Schema(new NodePrimitive(AVRO_FLOAT)) {}
};
class AVRO_DECL DoubleSchema : public Schema {
public:
DoubleSchema() : Schema(new NodePrimitive(AVRO_DOUBLE)) {}
};
class AVRO_DECL StringSchema : public Schema {
public:
StringSchema() : Schema(new NodePrimitive(AVRO_STRING)) {}
};
class AVRO_DECL BytesSchema : public Schema {
public:
BytesSchema() : Schema(new NodePrimitive(AVRO_BYTES)) {}
};
class AVRO_DECL RecordSchema : public Schema {
public:
explicit RecordSchema(const std::string &name);
void addField(const std::string &name, const Schema &fieldSchema);
std::string getDoc() const;
void setDoc(const std::string &);
};
class AVRO_DECL EnumSchema : public Schema {
public:
explicit EnumSchema(const std::string &name);
void addSymbol(const std::string &symbol);
};
class AVRO_DECL ArraySchema : public Schema {
public:
explicit ArraySchema(const Schema &itemsSchema);
ArraySchema(const ArraySchema &itemsSchema);
};
class AVRO_DECL MapSchema : public Schema {
public:
explicit MapSchema(const Schema &valuesSchema);
MapSchema(const MapSchema &itemsSchema);
};
class AVRO_DECL UnionSchema : public Schema {
public:
UnionSchema();
void addType(const Schema &typeSchema);
};
class AVRO_DECL FixedSchema : public Schema {
public:
FixedSchema(int size, const std::string &name);
};
class AVRO_DECL SymbolicSchema : public Schema {
public:
SymbolicSchema(const Name &name, const NodePtr &link);
};
} // namespace avro
#endif

View File

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_SchemaResolution_hh__
#define avro_SchemaResolution_hh__
#include "Config.hh"
namespace avro {
enum SchemaResolution {
/// The schemas definitely do not match
RESOLVE_NO_MATCH,
/// The schemas match at a cursory level
///
/// For records and enums, this means the name is the same, but it does not
/// necessarily mean that every symbol or field is an exact match.
RESOLVE_MATCH,
/// For primitives, the matching may occur if the type is promotable. This means that the
/// writer matches reader if the writer's type is promoted the specified type.
//@{
RESOLVE_PROMOTABLE_TO_LONG,
RESOLVE_PROMOTABLE_TO_FLOAT,
RESOLVE_PROMOTABLE_TO_DOUBLE,
//@}
};
} // namespace avro
#endif

View File

@ -1,127 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Serializer_hh__
#define avro_Serializer_hh__
#include <array>
#include <boost/noncopyable.hpp>
#include "Config.hh"
#include "Writer.hh"
namespace avro {
/// Class that wraps a Writer or ValidatingWriter with an interface that uses
/// explicit write* names instead of writeValue
template<class Writer>
class Serializer : private boost::noncopyable {
public:
/// Constructor only works with Writer
explicit Serializer() : writer_() {}
/// Constructor only works with ValidatingWriter
explicit Serializer(const ValidSchema &schema) : writer_(schema) {}
void writeNull() {
writer_.writeValue(Null());
}
void writeBool(bool val) {
writer_.writeValue(val);
}
void writeInt(int32_t val) {
writer_.writeValue(val);
}
void writeLong(int64_t val) {
writer_.writeValue(val);
}
void writeFloat(float val) {
writer_.writeValue(val);
}
void writeDouble(double val) {
writer_.writeValue(val);
}
void writeBytes(const void *val, size_t size) {
writer_.writeBytes(val, size);
}
template<size_t N>
void writeFixed(const uint8_t (&val)[N]) {
writer_.writeFixed(val);
}
template<size_t N>
void writeFixed(const std::array<uint8_t, N> &val) {
writer_.writeFixed(val);
}
void writeString(const std::string &val) {
writer_.writeValue(val);
}
void writeRecord() {
writer_.writeRecord();
}
void writeRecordEnd() {
writer_.writeRecordEnd();
}
void writeArrayBlock(int64_t size) {
writer_.writeArrayBlock(size);
}
void writeArrayEnd() {
writer_.writeArrayEnd();
}
void writeMapBlock(int64_t size) {
writer_.writeMapBlock(size);
}
void writeMapEnd() {
writer_.writeMapEnd();
}
void writeUnion(int64_t choice) {
writer_.writeUnion(choice);
}
void writeEnum(int64_t choice) {
writer_.writeEnum(choice);
}
InputBuffer buffer() const {
return writer_.buffer();
}
private:
Writer writer_;
};
} // namespace avro
#endif

View File

@ -1,357 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Codec_hh__
#define avro_Codec_hh__
#include "array"
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "boost/blank.hpp"
#include "AvroTraits.hh"
#include "Config.hh"
#include "Decoder.hh"
#include "Encoder.hh"
/**
* A bunch of templates and specializations for encoding and decoding
* specific types.
*
* Primitive AVRO types BOOLEAN, INT, LONG, FLOAT, DOUBLE, STRING and BYTES
* get decoded to and encoded from C++ types bool, int32_t, int64_t, float,
* double, std::string and std::vector<uint8_t> respectively. In addition,
* std::vector<T> for arbitrary type T gets encoded as an Avro array of T.
* Similarly, std::map<std::string, T> for arbitrary type T gets encoded
* as an Avro map with value type T.
*
* Users can have their custom types encoded/decoded by specializing
* avro::codec_traits class for their types.
*/
namespace avro {
typedef boost::blank null;
template<typename T>
void encode(Encoder &e, const T &t);
template<typename T>
void decode(Decoder &d, T &t);
/**
* Codec_traits tells avro how to encode and decode an object of given type.
*
* The class is expected to have two static methods:
* \li static void encode(Encoder& e, const T& value);
* \li static void decode(Decoder& e, T& value);
* The default is empty.
*/
template<typename T>
struct codec_traits;
/**
* codec_traits for Avro boolean.
*/
template<>
struct codec_traits<bool> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, bool b) {
e.encodeBool(b);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, bool &b) {
b = d.decodeBool();
}
};
/**
* codec_traits for Avro int.
*/
template<>
struct codec_traits<int32_t> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, int32_t i) {
e.encodeInt(i);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, int32_t &i) {
i = d.decodeInt();
}
};
/**
* codec_traits for Avro long.
*/
template<>
struct codec_traits<int64_t> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, int64_t l) {
e.encodeLong(l);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, int64_t &l) {
l = d.decodeLong();
}
};
/**
* codec_traits for Avro float.
*/
template<>
struct codec_traits<float> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, float f) {
e.encodeFloat(f);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, float &f) {
f = d.decodeFloat();
}
};
/**
* codec_traits for Avro double.
*/
template<>
struct codec_traits<double> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, double d) {
e.encodeDouble(d);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, double &dbl) {
dbl = d.decodeDouble();
}
};
/**
* codec_traits for Avro string.
*/
template<>
struct codec_traits<std::string> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const std::string &s) {
e.encodeString(s);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, std::string &s) {
s = d.decodeString();
}
};
/**
* codec_traits for Avro bytes.
*/
template<>
struct codec_traits<std::vector<uint8_t>> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const std::vector<uint8_t> &b) {
e.encodeBytes(b);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, std::vector<uint8_t> &s) {
d.decodeBytes(s);
}
};
/**
* codec_traits for Avro fixed.
*/
template<size_t N>
struct codec_traits<std::array<uint8_t, N>> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const std::array<uint8_t, N> &b) {
e.encodeFixed(b.data(), N);
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, std::array<uint8_t, N> &s) {
std::vector<uint8_t> v(N);
d.decodeFixed(N, v);
std::copy(v.data(), v.data() + N, s.data());
}
};
/**
* codec_traits for Avro arrays.
*/
template<typename T>
struct codec_traits<std::vector<T>> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const std::vector<T> &b) {
e.arrayStart();
if (!b.empty()) {
e.setItemCount(b.size());
for (typename std::vector<T>::const_iterator it = b.begin();
it != b.end(); ++it) {
e.startItem();
avro::encode(e, *it);
}
}
e.arrayEnd();
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, std::vector<T> &s) {
s.clear();
for (size_t n = d.arrayStart(); n != 0; n = d.arrayNext()) {
for (size_t i = 0; i < n; ++i) {
T t;
avro::decode(d, t);
s.push_back(std::move(t));
}
}
}
};
typedef codec_traits<std::vector<bool>::const_reference> bool_codec_traits;
template<>
struct codec_traits<std::conditional<avro::is_not_defined<bool_codec_traits>::value,
std::vector<bool>::const_reference, void>::type> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, std::vector<bool>::const_reference b) {
e.encodeBool(b);
}
};
/**
* codec_traits for Avro maps.
*/
template<typename T>
struct codec_traits<std::map<std::string, T>> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const std::map<std::string, T> &b) {
e.mapStart();
if (!b.empty()) {
e.setItemCount(b.size());
for (typename std::map<std::string, T>::const_iterator
it = b.begin();
it != b.end(); ++it) {
e.startItem();
avro::encode(e, it->first);
avro::encode(e, it->second);
}
}
e.mapEnd();
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, std::map<std::string, T> &s) {
s.clear();
for (size_t n = d.mapStart(); n != 0; n = d.mapNext()) {
for (size_t i = 0; i < n; ++i) {
std::string k;
avro::decode(d, k);
T &t = s[std::move(k)];
avro::decode(d, t);
}
}
}
};
/**
* codec_traits for Avro null.
*/
template<>
struct codec_traits<avro::null> {
/**
* Encodes a given value.
*/
static void encode(Encoder &e, const avro::null &) {
e.encodeNull();
}
/**
* Decodes into a given value.
*/
static void decode(Decoder &d, avro::null &) {
d.decodeNull();
}
};
/**
* Generic encoder function that makes use of the codec_traits.
*/
template<typename T>
void encode(Encoder &e, const T &t) {
codec_traits<T>::encode(e, t);
}
/**
* Generic decoder function that makes use of the codec_traits.
*/
template<typename T>
void decode(Decoder &d, T &t) {
codec_traits<T>::decode(d, t);
}
} // namespace avro
#endif // avro_Codec_hh__

View File

@ -1,476 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Stream_hh__
#define avro_Stream_hh__
#include <cstdint>
#include <cstring>
#include <memory>
#include "boost/utility.hpp"
#include "Config.hh"
#include "Exception.hh"
namespace avro {
/**
* A no-copy input stream.
*/
class AVRO_DECL InputStream : boost::noncopyable {
protected:
/**
* An empty constructor.
*/
InputStream() = default;
public:
/**
* Destructor.
*/
virtual ~InputStream() = default;
/**
* Returns some of available data.
*
* Returns true if some data is available, false if no more data is
* available or an error has occurred.
*/
virtual bool next(const uint8_t **data, size_t *len) = 0;
/**
* "Returns" back some of the data to the stream. The returned
* data must be less than what was obtained in the last call to
* next().
*/
virtual void backup(size_t len) = 0;
/**
* Skips number of bytes specified by len.
*/
virtual void skip(size_t len) = 0;
/**
* Returns the number of bytes read from this stream so far.
* All the bytes made available through next are considered
* to be used unless, returned back using backup.
*/
virtual size_t byteCount() const = 0;
};
typedef std::unique_ptr<InputStream> InputStreamPtr;
/**
* An InputStream which also supports seeking to a specific offset.
*/
class AVRO_DECL SeekableInputStream : public InputStream {
protected:
/**
* An empty constructor.
*/
SeekableInputStream() = default;
public:
/**
* Destructor.
*/
~SeekableInputStream() override = default;
/**
* Seek to a specific position in the stream. This may invalidate pointers
* returned from next(). This will also reset byteCount() to the given
* position.
*/
virtual void seek(int64_t position) = 0;
};
typedef std::unique_ptr<SeekableInputStream> SeekableInputStreamPtr;
/**
* A no-copy output stream.
*/
class AVRO_DECL OutputStream : boost::noncopyable {
protected:
/**
* An empty constructor.
*/
OutputStream() = default;
public:
/**
* Destructor.
*/
virtual ~OutputStream() = default;
/**
* Returns a buffer that can be written into.
* On successful return, data has the pointer to the buffer
* and len has the number of bytes available at data.
*/
virtual bool next(uint8_t **data, size_t *len) = 0;
/**
* "Returns" back to the stream some of the buffer obtained
* from in the last call to next().
*/
virtual void backup(size_t len) = 0;
/**
* Number of bytes written so far into this stream. The whole buffer
* returned by next() is assumed to be written unless some of
* it was returned using backup().
*/
virtual uint64_t byteCount() const = 0;
/**
* Flushes any data remaining in the buffer to the stream's underlying
* store, if any.
*/
virtual void flush() = 0;
};
typedef std::unique_ptr<OutputStream> OutputStreamPtr;
/**
* Returns a new OutputStream, which grows in memory chunks of specified size.
*/
AVRO_DECL OutputStreamPtr memoryOutputStream(size_t chunkSize = 4 * 1024);
/**
* Returns a new InputStream, with the data from the given byte array.
* It does not copy the data, the byte array should remain valid
* until the InputStream is used.
*/
AVRO_DECL InputStreamPtr memoryInputStream(const uint8_t *data, size_t len);
/**
* Returns a new InputStream with the contents written into an
* OutputStream. The output stream must have been returned by
* an earlier call to memoryOutputStream(). The contents for the new
* InputStream are the snapshot of the output stream. One can construct
* any number of memory input stream from a single memory output stream.
*/
AVRO_DECL InputStreamPtr memoryInputStream(const OutputStream &source);
/**
* Returns the contents written so far into the output stream, which should
* be a memory output stream. That is it must have been returned by a previous
* call to memoryOutputStream().
*/
AVRO_DECL std::shared_ptr<std::vector<uint8_t>> snapshot(const OutputStream &source);
/**
* Returns a new OutputStream whose contents would be stored in a file.
* Data is written in chunks of given buffer size.
*
* If there is a file with the given name, it is truncated and overwritten.
* If there is no file with the given name, it is created.
*/
AVRO_DECL OutputStreamPtr fileOutputStream(const char *filename,
size_t bufferSize = 8 * 1024);
/**
* Returns a new InputStream whose contents come from the given file.
* Data is read in chunks of given buffer size.
*/
AVRO_DECL InputStreamPtr fileInputStream(
const char *filename, size_t bufferSize = 8 * 1024);
AVRO_DECL SeekableInputStreamPtr fileSeekableInputStream(
const char *filename, size_t bufferSize = 8 * 1024);
/**
* Returns a new OutputStream whose contents will be sent to the given
* std::ostream. The std::ostream object should outlive the returned
* OutputStream.
*/
AVRO_DECL OutputStreamPtr ostreamOutputStream(std::ostream &os,
size_t bufferSize = 8 * 1024);
/**
* Returns a new InputStream whose contents come from the given
* std::istream. The std::istream object should outlive the returned
* InputStream.
*/
AVRO_DECL InputStreamPtr istreamInputStream(
std::istream &in, size_t bufferSize = 8 * 1024);
/**
* Returns a new InputStream whose contents come from the given
* std::istream. Use this instead of istreamInputStream if
* the istream does not support seekg (e.g. compressed streams).
* The returned InputStream would read off bytes instead of seeking.
* Of, course it has a performance penalty when reading instead of seeking;
* So, use this only when seekg does not work.
* The std::istream object should outlive the returned
* InputStream.
*/
AVRO_DECL InputStreamPtr nonSeekableIstreamInputStream(
std::istream &is, size_t bufferSize = 8 * 1024);
/** A convenience class for reading from an InputStream */
struct StreamReader {
/**
* The underlying input stream.
*/
InputStream *in_;
/**
* The next location to read from.
*/
const uint8_t *next_;
/**
* One past the last valid location.
*/
const uint8_t *end_;
/**
* Constructs an empty reader.
*/
StreamReader() : in_(nullptr), next_(nullptr), end_(nullptr) {}
/**
* Constructs a reader with the given underlying stream.
*/
explicit StreamReader(InputStream &in) : in_(nullptr), next_(nullptr), end_(nullptr) { reset(in); }
/**
* Replaces the current input stream with the given one after backing up
* the original one if required.
*/
void reset(InputStream &is) {
if (in_ != nullptr && end_ != next_) {
in_->backup(end_ - next_);
}
in_ = &is;
next_ = end_ = nullptr;
}
/**
* Read just one byte from the underlying stream. If there are no
* more data, throws an exception.
*/
uint8_t read() {
if (next_ == end_) {
more();
}
return *next_++;
}
/**
* Reads the given number of bytes from the underlying stream.
* If there are not that many bytes, throws an exception.
*/
void readBytes(uint8_t *b, size_t n) {
while (n > 0) {
if (next_ == end_) {
more();
}
size_t q = end_ - next_;
if (q > n) {
q = n;
}
::memcpy(b, next_, q);
next_ += q;
b += q;
n -= q;
}
}
/**
* Skips the given number of bytes. Of there are not so that many
* bytes, throws an exception.
*/
void skipBytes(size_t n) {
if (n > static_cast<size_t>(end_ - next_)) {
n -= end_ - next_;
next_ = end_;
in_->skip(n);
} else {
next_ += n;
}
}
/**
* Get as many byes from the underlying stream as possible in a single
* chunk.
* \return true if some data could be obtained. False is no more
* data is available on the stream.
*/
bool fill() {
size_t n = 0;
while (in_->next(&next_, &n)) {
if (n != 0) {
end_ = next_ + n;
return true;
}
}
return false;
}
/**
* Tries to get more data and if it cannot, throws an exception.
*/
void more() {
if (!fill()) {
throw Exception("EOF reached");
}
}
/**
* Returns true if and only if the end of stream is not reached.
*/
bool hasMore() {
return next_ != end_ || fill();
}
/**
* Returns unused bytes back to the underlying stream.
* If unRead is true the last byte read is also pushed back.
*/
void drain(bool unRead) {
if (unRead) {
--next_;
}
in_->backup(end_ - next_);
end_ = next_;
}
};
/**
* A convenience class to write data into an OutputStream.
*/
struct StreamWriter {
/**
* The underlying output stream for this writer.
*/
OutputStream *out_;
/**
* The next location to write to.
*/
uint8_t *next_;
/**
* One past the last location one can write to.
*/
uint8_t *end_;
/**
* Constructs a writer with no underlying stream.
*/
StreamWriter() : out_(nullptr), next_(nullptr), end_(nullptr) {}
/**
* Constructs a new writer with the given underlying stream.
*/
explicit StreamWriter(OutputStream &out) : out_(nullptr), next_(nullptr), end_(nullptr) { reset(out); }
/**
* Replaces the current underlying stream with a new one.
* If required, it backs up unused bytes in the previous stream.
*/
void reset(OutputStream &os) {
if (out_ != nullptr && end_ != next_) {
out_->backup(end_ - next_);
}
out_ = &os;
next_ = end_;
}
/**
* Writes a single byte.
*/
void write(uint8_t c) {
if (next_ == end_) {
more();
}
*next_++ = c;
}
/**
* Writes the specified number of bytes starting at \p b.
*/
void writeBytes(const uint8_t *b, size_t n) {
while (n > 0) {
if (next_ == end_) {
more();
}
size_t q = end_ - next_;
if (q > n) {
q = n;
}
::memcpy(next_, b, q);
next_ += q;
b += q;
n -= q;
}
}
/**
* backs up upto the currently written data and flushes the
* underlying stream.
*/
void flush() {
if (next_ != end_) {
out_->backup(end_ - next_);
next_ = end_;
}
out_->flush();
}
/**
* Return the number of bytes written so far. For a meaningful
* result, call this after a flush().
*/
int64_t byteCount() const {
return out_->byteCount();
}
/**
* Gets more space to write to. Throws an exception it cannot.
*/
void more() {
size_t n = 0;
while (out_->next(&next_, &n)) {
if (n != 0) {
end_ = next_ + n;
return;
}
}
throw Exception("EOF reached");
}
};
/**
* A convenience function to copy all the contents of an input stream into
* an output stream.
*/
inline void copy(InputStream &in, OutputStream &out) {
const uint8_t *p = nullptr;
size_t n = 0;
StreamWriter w(out);
while (in.next(&p, &n)) {
w.writeBytes(p, n);
}
w.flush();
}
} // namespace avro
#endif

View File

@ -1,112 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Types_hh__
#define avro_Types_hh__
#include <iostream>
#include "Config.hh"
namespace avro {
/**
* The "type" for the schema.
*/
enum Type {
AVRO_STRING, /*!< String */
AVRO_BYTES, /*!< Sequence of variable length bytes data */
AVRO_INT, /*!< 32-bit integer */
AVRO_LONG, /*!< 64-bit integer */
AVRO_FLOAT, /*!< Floating point number */
AVRO_DOUBLE, /*!< Double precision floating point number */
AVRO_BOOL, /*!< Boolean value */
AVRO_NULL, /*!< Null */
AVRO_RECORD, /*!< Record, a sequence of fields */
AVRO_ENUM, /*!< Enumeration */
AVRO_ARRAY, /*!< Homogeneous array of some specific type */
AVRO_MAP, /*!< Homogeneous map from string to some specific type */
AVRO_UNION, /*!< Union of one or more types */
AVRO_FIXED, /*!< Fixed number of bytes */
AVRO_NUM_TYPES, /*!< Marker */
// The following is a pseudo-type used in implementation
AVRO_SYMBOLIC = AVRO_NUM_TYPES, /*!< User internally to avoid circular references. */
AVRO_UNKNOWN = -1 /*!< Used internally. */
};
/**
* Returns true if and only if the given type is a primitive.
* Primitive types are: string, bytes, int, long, float, double, boolean
* and null
*/
inline constexpr bool isPrimitive(Type t) noexcept {
return (t >= AVRO_STRING) && (t < AVRO_RECORD);
}
/**
* Returns true if and only if the given type is a non primitive valid type.
* Primitive types are: string, bytes, int, long, float, double, boolean
* and null
*/
inline constexpr bool isCompound(Type t) noexcept {
return (t >= AVRO_RECORD) && (t < AVRO_NUM_TYPES);
}
/**
* Returns true if and only if the given type is a valid avro type.
*/
inline constexpr bool isAvroType(Type t) noexcept {
return (t >= AVRO_STRING) && (t < AVRO_NUM_TYPES);
}
/**
* Returns true if and only if the given type is within the valid range
* of enumeration.
*/
inline constexpr bool isAvroTypeOrPseudoType(Type t) noexcept {
return (t >= AVRO_STRING) && (t <= AVRO_NUM_TYPES);
}
/**
* Converts the given type into a string. Useful for generating messages.
*/
AVRO_DECL const std::string &toString(Type type) noexcept;
/**
* Writes a string form of the given type into the given ostream.
*/
AVRO_DECL std::ostream &operator<<(std::ostream &os, avro::Type type);
/// define a type to represent Avro Null in template functions
struct AVRO_DECL Null {};
/**
* Writes schema for null \p null type to \p os.
* \param os The ostream to write to.
* \param null The value to be written.
*/
std::ostream &operator<<(std::ostream &os, const Null &null);
} // namespace avro
#endif

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_ValidSchema_hh__
#define avro_ValidSchema_hh__
#include "Config.hh"
#include "Node.hh"
namespace avro {
class AVRO_DECL Schema;
/// A ValidSchema is basically a non-mutable Schema that has passed some
/// minimum of sanity checks. Once validated, any Schema that is part of
/// this ValidSchema is considered locked, and cannot be modified (an attempt
/// to modify a locked Schema will throw). Also, as it is validated, any
/// recursive duplications of schemas are replaced with symbolic links to the
/// original.
///
/// Once a Schema is converted to a valid schema it can be used in validating
/// parsers/serializers, converted to a json schema, etc.
///
class AVRO_DECL ValidSchema {
public:
explicit ValidSchema(NodePtr root);
explicit ValidSchema(const Schema &schema);
ValidSchema();
void setSchema(const Schema &schema);
const NodePtr &root() const {
return root_;
}
void toJson(std::ostream &os) const;
std::string toJson(bool prettyPrint = true) const;
void toFlatList(std::ostream &os) const;
protected:
NodePtr root_;
private:
static std::string compactSchema(const std::string &schema);
};
} // namespace avro
#endif

View File

@ -1,150 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Validating_hh__
#define avro_Validating_hh__
#include <boost/noncopyable.hpp>
#include <cstdint>
#include <utility>
#include <vector>
#include "Config.hh"
#include "Types.hh"
#include "ValidSchema.hh"
namespace avro {
class AVRO_DECL NullValidator : private boost::noncopyable {
public:
explicit NullValidator(const ValidSchema &schema) {}
NullValidator() = default;
void setCount(int64_t) {}
static bool typeIsExpected(Type) {
return true;
}
static Type nextTypeExpected() {
return AVRO_UNKNOWN;
}
static int nextSizeExpected() {
return 0;
}
static bool getCurrentRecordName(std::string &name) {
return true;
}
static bool getNextFieldName(std::string &name) {
return true;
}
void checkTypeExpected(Type) {}
void checkFixedSizeExpected(int) {}
};
/// This class is used by both the ValidatingSerializer and ValidationParser
/// objects. It advances the parse tree (containing logic how to advance
/// through the various compound types, for example a record must advance
/// through all leaf nodes but a union only skips to one), and reports which
/// type is next.
class AVRO_DECL Validator : private boost::noncopyable {
public:
explicit Validator(ValidSchema schema);
void setCount(int64_t val);
bool typeIsExpected(Type type) const {
return (expectedTypesFlag_ & typeToFlag(type)) != 0;
}
Type nextTypeExpected() const {
return nextType_;
}
int nextSizeExpected() const;
bool getCurrentRecordName(std::string &name) const;
bool getNextFieldName(std::string &name) const;
void checkTypeExpected(Type type) {
if (!typeIsExpected(type)) {
throw Exception(
boost::format("Type %1% does not match schema %2%")
% type % nextType_);
}
advance();
}
void checkFixedSizeExpected(int size) {
if (nextSizeExpected() != size) {
throw Exception(
boost::format("Wrong size for fixed, got %1%, expected %2%")
% size % nextSizeExpected());
}
checkTypeExpected(AVRO_FIXED);
}
private:
using flag_t = uint32_t;
static flag_t typeToFlag(Type type) {
flag_t flag = (1L << type);
return flag;
}
void setupOperation(const NodePtr &node);
void setWaitingForCount();
void advance();
void doAdvance();
void enumAdvance();
bool countingSetup();
void countingAdvance();
void unionAdvance();
void fixedAdvance();
void setupFlag(Type type);
const ValidSchema schema_;
Type nextType_;
flag_t expectedTypesFlag_;
bool compoundStarted_;
bool waitingForCount_;
int64_t count_;
struct CompoundType {
explicit CompoundType(NodePtr n) : node(std::move(n)), pos(0) {}
NodePtr node; ///< save the node
size_t pos; ///< track the leaf position to visit
};
std::vector<CompoundType> compoundStack_;
std::vector<size_t> counters_;
};
} // namespace avro
#endif

View File

@ -1,182 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Writer_hh__
#define avro_Writer_hh__
#include <array>
#include <boost/noncopyable.hpp>
#include "Config.hh"
#include "Types.hh"
#include "Validator.hh"
#include "Zigzag.hh"
#include "buffer/Buffer.hh"
namespace avro {
/// Class for writing avro data to a stream.
template<class ValidatorType>
class WriterImpl : private boost::noncopyable {
public:
WriterImpl() = default;
explicit WriterImpl(const ValidSchema &schema) : validator_(schema) {}
void writeValue(const Null &) {
validator_.checkTypeExpected(AVRO_NULL);
}
void writeValue(bool val) {
validator_.checkTypeExpected(AVRO_BOOL);
int8_t byte = (val != 0);
buffer_.writeTo(byte);
}
void writeValue(int32_t val) {
validator_.checkTypeExpected(AVRO_INT);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
std::array<uint8_t, 5> bytes;
size_t size = encodeInt32(val, bytes);
buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size);
}
void writeValue(int64_t val) {
validator_.checkTypeExpected(AVRO_LONG);
putLong(val);
}
void writeValue(float val) {
validator_.checkTypeExpected(AVRO_FLOAT);
union {
float f;
int32_t i;
} v;
v.f = val;
buffer_.writeTo(v.i);
}
void writeValue(double val) {
validator_.checkTypeExpected(AVRO_DOUBLE);
union {
double d;
int64_t i;
} v;
v.d = val;
buffer_.writeTo(v.i);
}
void writeValue(const std::string &val) {
validator_.checkTypeExpected(AVRO_STRING);
putBytes(val.c_str(), val.size());
}
void writeBytes(const void *val, size_t size) {
validator_.checkTypeExpected(AVRO_BYTES);
putBytes(val, size);
}
template<size_t N>
void writeFixed(const uint8_t (&val)[N]) {
validator_.checkFixedSizeExpected(N);
buffer_.writeTo(reinterpret_cast<const char *>(val), N);
}
template<size_t N>
void writeFixed(const std::array<uint8_t, N> &val) {
validator_.checkFixedSizeExpected(val.size());
buffer_.writeTo(reinterpret_cast<const char *>(val.data()), val.size());
}
void writeRecord() {
validator_.checkTypeExpected(AVRO_RECORD);
validator_.checkTypeExpected(AVRO_LONG);
validator_.setCount(1);
}
void writeRecordEnd() {
validator_.checkTypeExpected(AVRO_RECORD);
validator_.checkTypeExpected(AVRO_LONG);
validator_.setCount(0);
}
void writeArrayBlock(int64_t size) {
validator_.checkTypeExpected(AVRO_ARRAY);
writeCount(size);
}
void writeArrayEnd() {
writeArrayBlock(0);
}
void writeMapBlock(int64_t size) {
validator_.checkTypeExpected(AVRO_MAP);
writeCount(size);
}
void writeMapEnd() {
writeMapBlock(0);
}
void writeUnion(int64_t choice) {
validator_.checkTypeExpected(AVRO_UNION);
writeCount(choice);
}
void writeEnum(int64_t choice) {
validator_.checkTypeExpected(AVRO_ENUM);
writeCount(choice);
}
InputBuffer buffer() const {
return buffer_;
}
private:
void putLong(int64_t val) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
std::array<uint8_t, 10> bytes;
size_t size = encodeInt64(val, bytes);
buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size);
}
void putBytes(const void *val, size_t size) {
putLong(size);
buffer_.writeTo(reinterpret_cast<const char *>(val), size);
}
void writeCount(int64_t count) {
validator_.checkTypeExpected(AVRO_LONG);
validator_.setCount(count);
putLong(count);
}
ValidatorType validator_;
OutputBuffer buffer_;
};
using Writer = WriterImpl<NullValidator>;
using ValidatingWriter = WriterImpl<Validator>;
} // namespace avro
#endif

View File

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Encoding_hh__
#define avro_Encoding_hh__
#include <array>
#include <cstddef>
#include <cstdint>
#include "Config.hh"
/// \file
/// Functions for encoding and decoding integers with zigzag compression
namespace avro {
AVRO_DECL constexpr uint64_t encodeZigzag64(int64_t input) noexcept {
// cppcheck-suppress shiftTooManyBitsSigned
return ((input << 1) ^ (input >> 63));
}
AVRO_DECL constexpr int64_t decodeZigzag64(uint64_t input) noexcept {
return static_cast<int64_t>(((input >> 1) ^ -(static_cast<int64_t>(input) & 1)));
}
AVRO_DECL constexpr uint32_t encodeZigzag32(int32_t input) noexcept {
// cppcheck-suppress shiftTooManyBitsSigned
return ((input << 1) ^ (input >> 31));
}
AVRO_DECL constexpr int32_t decodeZigzag32(uint32_t input) noexcept {
return static_cast<int32_t>(((input >> 1) ^ -(static_cast<int64_t>(input) & 1)));
}
AVRO_DECL size_t encodeInt32(int32_t input, std::array<uint8_t, 5> &output) noexcept;
AVRO_DECL size_t encodeInt64(int64_t input, std::array<uint8_t, 10> &output) noexcept;
} // namespace avro
#endif

View File

@ -1,492 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Buffer_hh__
#define avro_Buffer_hh__
#ifndef _WIN32
#include <sys/uio.h>
#endif
#include <utility>
#include <vector>
#include "../Config.hh"
#include "detail/BufferDetail.hh"
#include "detail/BufferDetailIterator.hh"
/**
* \file Buffer.hh
*
* \brief Definitions for InputBuffer and OutputBuffer classes
*
**/
namespace avro {
class OutputBuffer;
class InputBuffer;
/**
* The OutputBuffer (write-only buffer)
*
* Use cases for OutputBuffer
*
* - write message to buffer using ostream class or directly
* - append messages to headers
* - building up streams of messages via append
* - converting to read-only buffers for sending
* - extracting parts of the messages into read-only buffers
*
* -# ASIO access:
* - write to a buffer(s) by asio using iterator
* - convert to read buffer for deserializing
*
* OutputBuffer is assignable and copy-constructable. On copy or assignment,
* only a pointer is copied, so the two resulting copies are identical, so
* modifying one will modify both.
**/
class AVRO_DECL OutputBuffer {
public:
typedef detail::size_type size_type;
typedef detail::data_type data_type;
/**
* The asio library expects a const_iterator (the const-ness refers to the
* fact that the underlying avro of buffers will not be modified, even
* though the data in those buffers is being modified). The iterator
* provides the list of addresses an operation can write to.
**/
typedef detail::OutputBufferIterator const_iterator;
/**
* Default constructor. Will pre-allocate at least the requested size, but
* can grow larger on demand.
*
* Destructor uses the default, which resets a shared pointer, deleting the
* underlying data if no other copies of exist.
*
* Copy and assignment operators are not explicitly provided because the
* default ones work fine. The default makes only a shallow copy, so the
* copies will refer to the same memory. This is required by asio
* functions, which will implicitly make copies for asynchronous
* operations. Therefore, the user must be careful that if they create
* multiple copies of the same OutputBuffer, only one is being modified
* otherwise undefined behavior may occur.
*
**/
explicit OutputBuffer(size_type reserveSize = 0) : pimpl_(new detail::BufferImpl) {
if (reserveSize) {
reserve(reserveSize);
}
}
/**
* Reserve enough space for a wroteTo() operation. When using writeTo(),
* the buffer will grow dynamically as needed. But when using the iterator
* to write (followed by wroteTo()), data may only be written to the space
* available, so this ensures there is enough room in the buffer before
* the write operation.
**/
void reserve(size_type reserveSize) {
pimpl_->reserveFreeSpace(reserveSize);
}
/**
* Write a block of data to the buffer. The buffer size will automatically
* grow if the size is larger than what is currently free.
**/
size_type writeTo(const data_type *data, size_type size) {
return pimpl_->writeTo(data, size);
}
/**
* Write a single value to the buffer. The buffer size will automatically
* grow if there is not room for the byte. The value must be a
* "fundamental" type, e.g. int, float, etc. (otherwise use the other
* writeTo tests).
**/
template<typename T>
void writeTo(T val) {
pimpl_->writeTo(val, std::is_fundamental<T>());
}
/**
* Update the state of the buffer after writing through the iterator
* interface. This function exists primarily for the boost:asio which
* writes directly to the buffer using its iterator. In this case, the
* internal state of the buffer does not reflect that the data was written
* This informs the buffer how much data was written.
*
* The buffer does not automatically resize in this case, the bytes written
* cannot exceed the amount of free space. Attempting to write more will
* throw a std::length_error exception.
**/
size_type wroteTo(size_type size) {
int wrote = 0;
if (size) {
if (size > freeSpace()) {
throw std::length_error("Impossible to write more data than free space");
}
wrote = pimpl_->wroteTo(size);
}
return wrote;
}
/**
* Does the buffer have any data?
**/
bool empty() const {
return (pimpl_->size() == 0);
}
/**
* Returns the size of the buffer, in bytes.
*/
size_type size() const {
return pimpl_->size();
}
/**
* Returns the current free space that is available to write to in the
* buffer, in bytes. This is not a strict limit in size, as writeTo() can
* automatically increase capacity if necessary.
**/
size_type freeSpace() const {
return pimpl_->freeSpace();
}
/**
* Appends the data in the argument to the end of this buffer. The
* argument can be either an InputBuffer or OutputBuffer.
*
**/
template<class BufferType>
void append(const BufferType &buf) {
// don't append an empty buffer
if (buf.size()) {
pimpl_->append(*(buf.pimpl_.get()));
}
}
/**
* Return an iterator pointing to the first data chunk of this buffer
* that may be written to.
**/
const_iterator begin() const {
return const_iterator(pimpl_->beginWrite());
}
/**
* Return the end iterator for writing.
**/
const_iterator end() const {
return const_iterator(pimpl_->endWrite());
}
/**
* Discard any data in this buffer.
**/
void discardData() {
pimpl_->discardData();
}
/**
* Discard the specified number of bytes from this data, starting at the beginning.
* Throws if the size is greater than the number of bytes.
**/
void discardData(size_t bytes) {
if (bytes > 0) {
if (bytes < pimpl_->size()) {
pimpl_->discardData(bytes);
} else if (bytes == pimpl_->size()) {
pimpl_->discardData();
} else {
throw std::out_of_range("trying to discard more data than exists");
}
}
}
/**
* Remove bytes from this buffer, starting from the beginning, and place
* them into a new buffer. Throws if the number of requested bytes exceeds
* the size of the buffer. Data and freeSpace in the buffer after bytes
* remains in this buffer.
**/
InputBuffer extractData(size_type bytes);
/**
* Remove all bytes from this buffer, returning them in a new buffer.
* After removing data, some freeSpace may remain in this buffer.
**/
InputBuffer extractData();
/**
* Clone this buffer, creating a copy that contains the same data.
**/
OutputBuffer clone() const {
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl(*pimpl_));
return OutputBuffer(newImpl);
}
/**
* Add unmanaged data to the buffer. The buffer will not automatically
* free the data, but it will call the supplied function when the data is
* no longer referenced by the buffer (or copies of the buffer).
**/
void appendForeignData(const data_type *data, size_type size, const detail::free_func &func) {
pimpl_->appendForeignData(data, size, func);
}
/**
* Returns the number of chunks that contain free space.
**/
int numChunks() const {
return pimpl_->numFreeChunks();
}
/**
* Returns the number of chunks that contain data
**/
int numDataChunks() const {
return pimpl_->numDataChunks();
}
private:
friend class InputBuffer;
friend class BufferReader;
explicit OutputBuffer(detail::BufferImpl::SharedPtr pimpl) : pimpl_(std::move(pimpl)) {}
detail::BufferImpl::SharedPtr pimpl_; ///< Must never be null.
};
/**
* The InputBuffer (read-only buffer)
*
* InputBuffer is an immutable buffer which that may be constructed from an
* OutputBuffer, or several of OutputBuffer's methods. Once the data is
* transfered to an InputBuffer it cannot be modified, only read (via
* BufferReader, istream, or its iterator).
*
* Assignments and copies are shallow copies.
*
* -# ASIO access: - iterate using const_iterator for sending messages
*
**/
class AVRO_DECL InputBuffer {
public:
typedef detail::size_type size_type;
typedef detail::data_type data_type;
// needed for asio
typedef detail::InputBufferIterator const_iterator;
/**
* Default InputBuffer creates an empty buffer.
*
* Copy/assignment functions use the default ones. They will do a shallow
* copy, and because InputBuffer is immutable, the copies will be
* identical.
*
* Destructor also uses the default, which resets a shared pointer,
* deleting the underlying data if no other copies of exist.
**/
InputBuffer() : pimpl_(new detail::BufferImpl) {}
/**
* Construct an InputBuffer that contains the contents of an OutputBuffer.
* The two buffers will have the same contents, but this copy will be
* immutable, while the the OutputBuffer may still be written to.
*
* If you wish to move the data from the OutputBuffer to a new InputBuffer
* (leaving only free space in the OutputBuffer),
* OutputBuffer::extractData() will do this more efficiently.
*
* Implicit conversion is allowed.
**/
// NOLINTNEXTLINE(google-explicit-constructor)
InputBuffer(const OutputBuffer &src) : pimpl_(new detail::BufferImpl(*src.pimpl_)) {}
/**
* Does the buffer have any data?
**/
bool empty() const {
return (pimpl_->size() == 0);
}
/**
* Returns the size of the buffer, in bytes.
**/
size_type size() const {
return pimpl_->size();
}
/**
* Return an iterator pointing to the first data chunk of this buffer
* that contains data.
**/
const_iterator begin() const {
return const_iterator(pimpl_->beginRead());
}
/**
* Return the end iterator.
**/
const_iterator end() const {
return const_iterator(pimpl_->endRead());
}
/**
* Returns the number of chunks containing data.
**/
int numChunks() const {
return pimpl_->numDataChunks();
}
private:
friend class OutputBuffer; // for append function
friend class istreambuf;
friend class BufferReader;
explicit InputBuffer(const detail::BufferImpl::SharedPtr &pimpl) : pimpl_(pimpl) {}
/**
* Class to indicate that a copy of a OutputBuffer to InputBuffer should be
* a shallow copy, used to enable reading of the contents of an
* OutputBuffer without need to convert it to InputBuffer using a deep
* copy. It is private and only used by BufferReader and istreambuf
* classes.
*
* Writing to an OutputBuffer while it is being read may lead to undefined
* behavior.
**/
class ShallowCopy {};
/**
* Make a shallow copy of an OutputBuffer in order to read it without
* causing conversion overhead.
**/
InputBuffer(const OutputBuffer &src, const ShallowCopy &) : pimpl_(src.pimpl_) {}
/**
* Make a shallow copy of an InputBuffer. The default copy constructor
* already provides shallow copy, this is just provided for generic
* algorithms that wish to treat InputBuffer and OutputBuffer in the same
* manner.
**/
InputBuffer(const InputBuffer &src, const ShallowCopy &) : pimpl_(src.pimpl_) {}
detail::BufferImpl::ConstSharedPtr pimpl_; ///< Must never be null.
};
/*
* Implementations of some OutputBuffer functions are inlined here
* because InputBuffer definition was required before.
*/
inline InputBuffer OutputBuffer::extractData() {
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
if (pimpl_->size()) {
pimpl_->extractData(*newImpl);
}
return InputBuffer(newImpl);
}
inline InputBuffer OutputBuffer::extractData(size_type bytes) {
if (bytes > pimpl_->size()) {
throw std::out_of_range("trying to extract more data than exists");
}
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
if (bytes > 0) {
if (bytes < pimpl_->size()) {
pimpl_->extractData(*newImpl, bytes);
} else {
pimpl_->extractData(*newImpl);
}
}
return InputBuffer(newImpl);
}
#ifndef _WIN32
/**
* Create an array of iovec structures from the buffer. This utility is used
* to support writev and readv function calls. The caller should ensure the
* buffer object is not deleted while using the iovec vector.
*
* If the BufferType is an InputBuffer, the iovec will point to the data that
* already exists in the buffer, for reading.
*
* If the BufferType is an OutputBuffer, the iovec will point to the free
* space, which may be written to. Before writing, the caller should call
* OutputBuffer::reserve() to create enough room for the desired write (which
* can be verified by calling OutputBuffer::freeSpace()), and after writing,
* they MUST call OutputBuffer::wroteTo(), otherwise the buffer will not know
* the space is not free anymore.
*
**/
template<class BufferType>
inline void toIovec(BufferType &buf, std::vector<struct iovec> &iov) {
const int chunks = buf.numChunks();
iov.resize(chunks);
typename BufferType::const_iterator iter = buf.begin();
for (int i = 0; i < chunks; ++i) {
iov[i].iov_base = const_cast<typename BufferType::data_type *>(iter->data());
iov[i].iov_len = iter->size();
++iter;
}
}
#endif
} // namespace avro
#endif

View File

@ -1,112 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferPrint_hh__
#define avro_BufferPrint_hh__
#include "BufferReader.hh"
#include <cctype>
#include <iomanip>
#include <iostream>
/**
* \file BufferPrint.hh
*
* \brief Convenience functions for printing buffer contents
**/
namespace avro {
namespace detail {
/**
* \fn hexPrint
*
* Prints a buffer to a stream in the canonical hex+ASCII format,
* the same used by the program 'hexdump -C'
*
**/
inline void
hexPrint(std::ostream &os, BufferReader &reader) {
std::ios_base::fmtflags savedFlags = os.flags();
char sixteenBytes[16];
int offset = 0;
os << std::setfill('0');
os << std::hex;
while (reader.bytesRemaining()) {
os << std::setw(8) << offset << " ";
size_t inBuffer = reader.read(sixteenBytes, sizeof(sixteenBytes));
offset += inBuffer;
// traverse 8 bytes or inBuffer, whatever is less
size_t cnt = std::min(inBuffer, static_cast<size_t>(8));
size_t i = 0;
for (; i < cnt; ++i) {
os << std::setw(2);
os << (static_cast<int>(sixteenBytes[i]) & 0xff) << ' ';
}
for (; i < 8; ++i) {
os << " ";
}
os << ' ';
// traverse 16 bytes or inBuffer, whatever is less
cnt = std::min(inBuffer, static_cast<size_t>(16));
for (; i < cnt; ++i) {
os << std::setw(2);
os << (static_cast<int>(sixteenBytes[i]) & 0xff) << ' ';
}
for (; i < 16; ++i) {
os << " ";
}
os << " |";
for (i = 0; i < inBuffer; ++i) {
os.put(isprint(sixteenBytes[i] & 0xff) ? sixteenBytes[i] : '.');
}
os << "|\n";
}
// restore flags
os.flags(savedFlags);
}
} // namespace detail
} // namespace avro
inline std::ostream &operator<<(std::ostream &os, const avro::OutputBuffer &buffer) {
avro::BufferReader reader(buffer);
avro::detail::hexPrint(os, reader);
return os;
}
inline std::ostream &operator<<(std::ostream &os, const avro::InputBuffer &buffer) {
avro::BufferReader reader(buffer);
avro::detail::hexPrint(os, reader);
return os;
}
#endif

View File

@ -1,273 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferReader_hh__
#define avro_BufferReader_hh__
#include "Buffer.hh"
#include <type_traits>
#ifdef min
#undef min
#endif
/**
* \file BufferReader.hh
*
* \brief Helper class for reading bytes from buffer in a streaming manner,
* without the overhead of istreams.
*
**/
namespace avro {
/**
* Helper class for reading bytes from buffer without worrying about
* chunk boundaries. May read from an InputBuffer or OutputBuffer.
*
**/
class AVRO_DECL BufferReader : private boost::noncopyable {
public:
typedef detail::data_type data_type;
typedef detail::size_type size_type;
private:
size_type chunkRemaining() const {
return iter_->dataSize() - chunkPos_;
}
void incrementChunk(size_type howMuch) {
bytesRemaining_ -= howMuch;
chunkPos_ += howMuch;
if (chunkPos_ == iter_->dataSize()) {
chunkPos_ = 0;
++iter_;
}
}
void rewind() {
iter_ = bufferImpl_->beginRead();
bytesRemaining_ = bytes_;
chunkPos_ = 0;
}
const data_type *addr() const {
return iter_->tellReadPos() + chunkPos_;
}
public:
explicit BufferReader(const InputBuffer &buf) : bufferImpl_(buf.pimpl_),
iter_(bufferImpl_->beginRead()),
bytes_(bufferImpl_->size()),
bytesRemaining_(bytes_),
chunkPos_(0) {}
explicit BufferReader(const OutputBuffer &buf) : bufferImpl_(buf.pimpl_),
iter_(bufferImpl_->beginRead()),
bytes_(bufferImpl_->size()),
bytesRemaining_(bytes_),
chunkPos_(0) {}
/**
* How many bytes are still not read from this buffer.
**/
size_type bytesRemaining() const {
return bytesRemaining_;
}
/**
* Read a block of data from the front of the buffer.
**/
size_type bytesRead() const {
return bytes_ - bytesRemaining_;
}
/**
* Read a block of data from the buffer.
**/
size_type read(data_type *data, size_type size) {
if (size > bytesRemaining_) {
size = bytesRemaining_;
}
size_type sizeToRead = size;
while (sizeToRead) {
const size_type toRead = std::min(sizeToRead, chunkRemaining());
memcpy(data, addr(), toRead);
sizeToRead -= toRead;
data += toRead;
incrementChunk(toRead);
}
return size;
}
/**
* Read a block of data from the buffer.
**/
bool read(std::string &str, size_type size) {
if (size > bytesRemaining_) {
return false;
}
if (size <= chunkRemaining()) {
fastStringRead(str, size);
} else {
slowStringRead(str, size);
}
return true;
}
/**
* Read a single value from the buffer. The value must be a "fundamental"
* type, e.g. int, float, etc. (otherwise use the other writeTo tests).
*
**/
template<typename T>
bool read(T &val) {
return read(val, std::is_fundamental<T>());
}
/**
* Skips a block of data from the buffer.
**/
bool skip(size_type bytes) {
bool skipped = false;
if (bytes <= bytesRemaining_) {
doSkip(bytes);
skipped = true;
}
return skipped;
}
/**
* Seek to a position in the buffer.
**/
bool seek(size_type pos) {
if (pos > bytes_) {
return false;
}
size_type toSkip = pos;
size_type curPos = bytesRead();
// if the seek position is ahead, we can use skip to get there
if (pos >= curPos) {
toSkip -= curPos;
}
// if the seek position is ahead of the start of the chunk we can back up to
// start of the chunk
else if (pos >= (curPos - chunkPos_)) {
curPos -= chunkPos_;
bytesRemaining_ += chunkPos_;
chunkPos_ = 0;
toSkip -= curPos;
} else {
rewind();
}
doSkip(toSkip);
return true;
}
bool peek(char &val) {
bool ret = (bytesRemaining_ > 0);
if (ret) {
val = *(addr());
}
return ret;
}
InputBuffer copyData(size_type bytes) {
if (bytes > bytesRemaining_) {
// force no copy
bytes = 0;
}
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
if (bytes) {
bufferImpl_->copyData(*newImpl, iter_, chunkPos_, bytes);
doSkip(bytes);
}
return InputBuffer(newImpl);
}
private:
void doSkip(size_type sizeToSkip) {
while (sizeToSkip) {
const size_type toSkip = std::min(sizeToSkip, chunkRemaining());
sizeToSkip -= toSkip;
incrementChunk(toSkip);
}
}
template<typename T>
bool read(T &val, const std::true_type &) {
if (sizeof(T) > bytesRemaining_) {
return false;
}
if (sizeof(T) <= chunkRemaining()) {
val = *(reinterpret_cast<const T *>(addr()));
incrementChunk(sizeof(T));
} else {
read(reinterpret_cast<data_type *>(&val), sizeof(T));
}
return true;
}
/// An uninstantiable function, that is if boost::is_fundamental check fails
template<typename T>
bool read(T &val, const std::false_type &) {
static_assert(sizeof(T) == 0, "Not a valid type to read");
return false;
}
void fastStringRead(std::string &str, size_type sizeToCopy) {
str.assign(addr(), sizeToCopy);
incrementChunk(sizeToCopy);
}
void slowStringRead(std::string &str, size_type sizeToCopy) {
str.clear();
str.reserve(sizeToCopy);
while (sizeToCopy) {
const size_type toCopy = std::min(sizeToCopy, chunkRemaining());
str.append(addr(), toCopy);
sizeToCopy -= toCopy;
incrementChunk(toCopy);
}
}
detail::BufferImpl::ConstSharedPtr bufferImpl_;
detail::BufferImpl::ChunkList::const_iterator iter_;
size_type bytes_;
size_type bytesRemaining_;
size_type chunkPos_;
};
} // namespace avro
#endif

View File

@ -1,89 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferStream_hh__
#define avro_BufferStream_hh__
#include "BufferStreambuf.hh"
/**
* \file BufferStream.hh
*
* \brief Custom istream and ostream classes for use with buffers
**/
namespace avro {
/**
*
* \brief Custom ostream class for writing to an OutputBuffer
*
**/
class AVRO_DECL ostream : public std::ostream {
public:
/// Default constructor, creates a new OutputBuffer.
ostream() : std::ostream(&obuf_) {}
/// Output to a specific buffer.
explicit ostream(OutputBuffer &buf) : std::ostream(&obuf_),
obuf_(buf) {}
/// Return the output buffer created by the write operations to this ostream.
const OutputBuffer &getBuffer() const {
return obuf_.getBuffer();
}
protected:
ostreambuf obuf_;
};
/**
* \brief Custom istream class for reading from an InputBuffer.
*
* If the buffer contains binary data, then it is recommended to only use the
* read() and readsome() functions--get() or getline() may be confused if the
* binary data happens to contain an EOF character.
*
* For buffers containing text, the full implementation of istream is safe.
*
**/
class AVRO_DECL istream : public std::istream {
public:
/// Constructor, requires an InputBuffer to read from.
explicit istream(const InputBuffer &buf) : std::istream(&ibuf_), ibuf_(buf) {}
/// Constructor, takes an OutputBuffer to read from (by making a shallow copy to an InputBuffer).
/// Writing to the OutputBuffer while an istream is using it may lead to undefined behavior.
explicit istream(const OutputBuffer &buf) : std::istream(&ibuf_), ibuf_(buf) {}
/// Return the InputBuffer this stream is reading from.
const InputBuffer &getBuffer() const {
return ibuf_.getBuffer();
}
protected:
istreambuf ibuf_;
};
} // namespace avro
#endif

View File

@ -1,238 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferStreambuf_hh__
#define avro_BufferStreambuf_hh__
#include <utility>
#include "Buffer.hh"
/** \file BufferStreambuf.hh
\brief streambuf implementation for istream and ostream.
*/
#ifdef min
#undef min
#endif
namespace avro {
/**
* \brief Implementation of streambuf for use by the Buffer's ostream.
*
* This class derives from std::streambuf and implements the virtual functions
* needed to operate on OutputBuffer. The override functions are overflow and
* xsputn. Typically custom streambufs will also override sync for output,
* but we have no need since all writes are immediately stored in the buffer.
**/
class AVRO_DECL ostreambuf : public std::streambuf {
public:
/// Default constructor creates a new OutputBuffer.
ostreambuf() : std::streambuf(),
buffer_() {}
/// Construct using an existing OutputBuffer.
explicit ostreambuf(OutputBuffer &buffer) : std::streambuf(),
buffer_(buffer) {}
/// Return the buffer.
const OutputBuffer &getBuffer() const {
return buffer_;
}
protected:
/// Write a single character to the stream.
int_type overflow(int_type c) override {
buffer_.writeTo(static_cast<OutputBuffer::data_type>(c));
return c;
}
/// Write a block of characters to the stream.
std::streamsize xsputn(const char_type *s, std::streamsize n) override {
return buffer_.writeTo(s, static_cast<size_t>(n));
}
private:
OutputBuffer buffer_;
};
/**
* \brief Implementation of streambuf for use by the Buffer's istream.
*
* This class derives from std::streambuf and implements the virtual functions
* needed to operate on InputBuffer. The override functions are underflow,
* seekpos, showmanyc, and seek. This is considered a buffered streambuf,
* because it can access a chunk of the InputBuffer at a time, using the
* iterator interface. Because the input is already buffered, uflow is not
* required. pbackfail is not yet implemented but can be if necessary (the
* inherited behavior is to fail, and has yet to be a problem).
*
**/
class AVRO_DECL istreambuf : public std::streambuf {
public:
/// Default constructor requires an InputBuffer to read from.
explicit istreambuf(InputBuffer buffer) : std::streambuf(),
buffer_(std::move(buffer)),
basePos_(0),
iter_(buffer_.begin()) {
setBuffer();
}
/// Default constructor converts an OutputBuffer to an InputBuffer
explicit istreambuf(const OutputBuffer &buffer) : std::streambuf(),
buffer_(buffer, InputBuffer::ShallowCopy()),
basePos_(0),
iter_(buffer_.begin()) {
setBuffer();
}
/// Return the buffer.
const InputBuffer &getBuffer() const {
return buffer_;
}
protected:
/// The current chunk of data is exhausted, read the next chunk.
int_type underflow() override {
if (iter_ != buffer_.end()) {
basePos_ += (egptr() - eback());
++iter_;
}
return setBuffer();
}
/// Get a block of data from the stream. Overrides default behavior
/// to ignore eof characters that may reside in the stream.
std::streamsize xsgetn(char_type *c, std::streamsize len) override {
std::streamsize bytesCopied = 0;
while (bytesCopied < len) {
size_t inBuffer = egptr() - gptr();
if (inBuffer) {
auto remaining = static_cast<size_t>(len - bytesCopied);
size_t toCopy = std::min(inBuffer, remaining);
memcpy(c, gptr(), toCopy);
c += toCopy;
bytesCopied += toCopy;
gbump(toCopy);
}
if (bytesCopied < len) {
underflow();
if (iter_ == buffer_.end()) {
break;
}
}
}
return bytesCopied;
}
/// Special seek override to navigate InputBuffer chunks.
pos_type seekoff(off_type off, std::ios::seekdir dir, std::ios_base::openmode) override {
off_type curpos = basePos_ + (gptr() - eback());
off_type newpos = off;
if (dir == std::ios::cur) {
newpos += curpos;
} else if (dir == std::ios::end) {
newpos += buffer_.size();
}
// short circuit for tell()
if (newpos == curpos) {
return curpos;
}
off_type endpos = basePos_ + (egptr() - eback());
// if the position is after our current buffer make
// sure it's not past the end of the buffer
if ((newpos > endpos) && (newpos > static_cast<off_type>(buffer_.size()))) {
return {-1};
}
// if the new position is before our current iterator
// reset the iterator to the beginning
else if (newpos < basePos_) {
iter_ = buffer_.begin();
basePos_ = 0;
setBuffer();
endpos = (egptr() - eback());
}
// now if the new position is after the end of the buffer
// increase the buffer until it is not
while (newpos > endpos) {
istreambuf::underflow();
endpos = basePos_ + (egptr() - eback());
}
setg(eback(), eback() + (newpos - basePos_), egptr());
return newpos;
}
/// Calls seekoff for implemention.
pos_type seekpos(pos_type pos, std::ios_base::openmode) override {
return istreambuf::seekoff(pos, std::ios::beg, std::ios_base::openmode(0));
}
/// Shows the number of bytes buffered in the current chunk, or next chunk if
/// current is exhausted.
std::streamsize showmanyc() override {
// this function only gets called when the current buffer has been
// completely read, verify this is the case, and if so, underflow to
// fetch the next buffer
if (egptr() - gptr() == 0) {
istreambuf::underflow();
}
return egptr() - gptr();
}
private:
/// Setup the streambuf buffer pointers after updating
/// the value of the iterator. Returns the first character
/// in the new buffer, or eof if there is no buffer.
int_type setBuffer() {
int_type ret = traits_type::eof();
if (iter_ != buffer_.end()) {
char *loc = const_cast<char *>(iter_->data());
setg(loc, loc, loc + iter_->size());
ret = std::char_traits<char>::to_int_type(*gptr());
} else {
setg(nullptr, nullptr, nullptr);
}
return ret;
}
const InputBuffer buffer_;
off_type basePos_;
InputBuffer::const_iterator iter_;
};
} // namespace avro
#endif

View File

@ -1,515 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferDetail_hh__
#define avro_BufferDetail_hh__
#include <boost/function.hpp>
#include <boost/shared_array.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/static_assert.hpp>
#include <boost/utility.hpp>
#include <utility>
#ifdef HAVE_BOOST_ASIO
#include <boost/asio/buffer.hpp>
#endif
#include <cassert>
#include <deque>
#include <exception>
/**
* \file BufferDetail.hh
*
* \brief The implementation details for the Buffer class.
*
**/
namespace avro {
namespace detail {
typedef char data_type;
typedef size_t size_type;
#ifdef HAVE_BOOST_ASIO
typedef boost::asio::const_buffer ConstAsioBuffer;
typedef boost::asio::mutable_buffer MutableAsioBuffer;
#endif
/// The size in bytes for blocks backing buffer chunks.
const size_type kMinBlockSize = 4096;
const size_type kMaxBlockSize = 16384;
const size_type kDefaultBlockSize = kMinBlockSize;
typedef boost::function<void(void)> free_func;
/**
* Simple class to hold a functor that executes on delete
**/
class CallOnDestroy {
public:
explicit CallOnDestroy(free_func func) : func_(std::move(func)) {}
~CallOnDestroy() {
if (func_) {
func_();
}
}
private:
free_func func_;
};
/**
* \brief A chunk is the building block for buffers.
*
* A chunk is backed by a memory block, and internally it maintains information
* about which area of the block it may use, and the portion of this area that
* contains valid data. More than one chunk may share the same underlying
* block, but the areas should never overlap. Chunk holds a shared pointer to
* an array of bytes so that shared blocks are reference counted.
*
* When a chunk is copied, the copy shares the same underlying buffer, but the
* copy receives its own copies of the start/cursor/end pointers, so each copy
* can be manipulated independently. This allows different buffers to share
* the same non-overlapping parts of a chunk, or even overlapping parts of a
* chunk if the situation arises.
*
**/
class Chunk {
public:
/// Default constructor, allocates a new underlying block for this chunk.
explicit Chunk(size_type size) : underlyingBlock_(new data_type[size]),
readPos_(underlyingBlock_.get()),
writePos_(readPos_),
endPos_(readPos_ + size) {}
/// Foreign buffer constructor, uses the supplied data for this chunk, and
/// only for reading.
Chunk(const data_type *data, size_type size, const free_func &func) : callOnDestroy_(new CallOnDestroy(func)),
readPos_(const_cast<data_type *>(data)),
writePos_(readPos_ + size),
endPos_(writePos_) {}
private:
// reference counted object will call a functor when it's destroyed
boost::shared_ptr<CallOnDestroy> callOnDestroy_;
public:
/// Remove readable bytes from the front of the chunk by advancing the
/// chunk start position.
void truncateFront(size_type howMuch) {
readPos_ += howMuch;
assert(readPos_ <= writePos_);
}
/// Remove readable bytes from the back of the chunk by moving the
/// chunk cursor position.
void truncateBack(size_type howMuch) {
writePos_ -= howMuch;
assert(readPos_ <= writePos_);
}
/// Tell the position the next byte may be written to.
data_type *tellWritePos() const {
return writePos_;
}
/// Tell the position of the first byte containing valid data.
const data_type *tellReadPos() const {
return readPos_;
}
/// After a write operation, increment the write position.
void incrementCursor(size_type howMuch) {
writePos_ += howMuch;
assert(writePos_ <= endPos_);
}
/// Tell how many bytes of data were written to this chunk.
size_type dataSize() const {
return (writePos_ - readPos_);
}
/// Tell how many bytes this chunk has available to write to.
size_type freeSize() const {
return (endPos_ - writePos_);
}
/// Tell how many bytes of data this chunk can hold (used and free).
size_type capacity() const {
return (endPos_ - readPos_);
}
private:
friend bool operator==(const Chunk &lhs, const Chunk &rhs);
friend bool operator!=(const Chunk &lhs, const Chunk &rhs);
// more than one buffer can share an underlying block, so use SharedPtr
boost::shared_array<data_type> underlyingBlock_;
data_type *readPos_; ///< The first readable byte in the block
data_type *writePos_; ///< The end of written data and start of free space
data_type *endPos_; ///< Marks the end of the usable block area
};
/**
* Compare underlying buffers and return true if they are equal
**/
inline bool operator==(const Chunk &lhs, const Chunk &rhs) {
return lhs.underlyingBlock_ == rhs.underlyingBlock_;
}
/**
* Compare underlying buffers and return true if they are unequal
**/
inline bool operator!=(const Chunk &lhs, const Chunk &rhs) {
return lhs.underlyingBlock_ != rhs.underlyingBlock_;
}
/**
* \brief Implementation details for Buffer class
*
* Internally, BufferImpl keeps two lists of chunks, one list consists entirely of
* chunks containing data, and one list which contains chunks with free space.
*
*
*/
class BufferImpl : boost::noncopyable {
/// Add a new chunk to the list of chunks for this buffer, growing the
/// buffer by the default block size.
void allocChunkChecked(size_type size = kDefaultBlockSize) {
writeChunks_.push_back(Chunk(size));
freeSpace_ += writeChunks_.back().freeSize();
}
/// Add a new chunk to the list of chunks for this buffer, growing the
/// buffer by the requested size, but within the range of a minimum and
/// maximum.
void allocChunk(size_type size) {
if (size < kMinBlockSize) {
size = kMinBlockSize;
} else if (size > kMaxBlockSize) {
size = kMaxBlockSize;
}
allocChunkChecked(size);
}
/// Update the state of the chunks after a write operation. This function
/// ensures the chunk states are consistent with the write.
void postWrite(size_type size) {
// precondition to this function is that the writeChunk_.front()
// contains the data that was just written, so make sure writeChunks_
// is not empty:
assert(size <= freeSpace_ && !writeChunks_.empty());
// This is probably the one tricky part of BufferImpl. The data that
// was written now exists in writeChunks_.front(). Now we must make
// sure that same data exists in readChunks_.back().
//
// There are two cases:
//
// 1. readChunks_.last() and writeChunk_.front() refer to the same
// underlying block, in which case they both just need their cursor
// updated to reflect the new state.
//
// 2. readChunk_.last() is not the same block as writeChunks_.front(),
// in which case it should be, since the writeChunk.front() contains
// the next bit of data that will be appended to readChunks_, and
// therefore needs to be copied there so we can proceed with updating
// their state.
//
// if readChunks_ is not the same as writeChunks_.front(), make a copy
// of it there
if (readChunks_.empty() || (readChunks_.back() != writeChunks_.front())) {
const Chunk &curChunk = writeChunks_.front();
readChunks_.push_back(curChunk);
// Any data that existed in the write chunk previously doesn't
// belong to this buffer (otherwise it would have already been
// added to the readChunk_ list). Here, adjust the start of the
// readChunk to begin after any data already existing in curChunk
readChunks_.back().truncateFront(curChunk.dataSize());
}
assert(readChunks_.back().freeSize() == writeChunks_.front().freeSize());
// update the states of both readChunks_ and writeChunks_ to indicate that they are
// holding the new data
readChunks_.back().incrementCursor(size);
writeChunks_.front().incrementCursor(size);
size_ += size;
freeSpace_ -= size;
// if there is no more free space in writeChunks_, the next write cannot use
// it, so dispose of it now
if (writeChunks_.front().freeSize() == 0) {
writeChunks_.pop_front();
}
}
public:
typedef std::deque<Chunk> ChunkList;
typedef boost::shared_ptr<BufferImpl> SharedPtr;
typedef boost::shared_ptr<const BufferImpl> ConstSharedPtr;
/// Default constructor, creates a buffer without any chunks
BufferImpl() : freeSpace_(0),
size_(0) {}
/// Copy constructor, gets a copy of all the chunks with data.
BufferImpl(const BufferImpl &src) : readChunks_(src.readChunks_),
freeSpace_(0),
size_(src.size_) {}
/// Amount of data held in this buffer.
size_type size() const {
return size_;
}
/// Capacity that may be written before the buffer must allocate more memory.
size_type freeSpace() const {
return freeSpace_;
}
/// Add enough free chunks to make the reservation size available.
/// Actual amount may be more (rounded up to next chunk).
void reserveFreeSpace(size_type reserveSize) {
while (freeSpace_ < reserveSize) {
allocChunk(reserveSize - freeSpace_);
}
}
/// Return the chunk avro's begin iterator for reading.
ChunkList::const_iterator beginRead() const {
return readChunks_.begin();
}
/// Return the chunk avro's end iterator for reading.
ChunkList::const_iterator endRead() const {
return readChunks_.end();
}
/// Return the chunk avro's begin iterator for writing.
ChunkList::const_iterator beginWrite() const {
return writeChunks_.begin();
}
/// Return the chunk avro's end iterator for writing.
ChunkList::const_iterator endWrite() const {
return writeChunks_.end();
}
/// Write a single value to buffer, add a new chunk if necessary.
template<typename T>
void writeTo(T val, const std::true_type &) {
if (freeSpace_ && (sizeof(T) <= writeChunks_.front().freeSize())) {
// fast path, there's enough room in the writeable chunk to just
// straight out copy it
*(reinterpret_cast<T *>(writeChunks_.front().tellWritePos())) = val;
postWrite(sizeof(T));
} else {
// need to fixup chunks first, so use the regular memcpy
// writeTo method
writeTo(reinterpret_cast<data_type *>(&val), sizeof(T));
}
}
/// An uninstantiable function, this is if boost::is_fundamental check fails,
/// and will compile-time assert.
template<typename T>
void writeTo(T /*val*/, const std::false_type &) {
BOOST_STATIC_ASSERT(sizeof(T) == 0);
}
/// Write a block of data to the buffer, adding new chunks if necessary.
size_type writeTo(const data_type *data, size_type size) {
size_type bytesLeft = size;
while (bytesLeft) {
if (freeSpace_ == 0) {
allocChunkChecked();
}
Chunk &chunk = writeChunks_.front();
size_type toCopy = std::min<size_type>(chunk.freeSize(), bytesLeft);
assert(toCopy);
memcpy(chunk.tellWritePos(), data, toCopy);
postWrite(toCopy);
data += toCopy;
bytesLeft -= toCopy;
}
return size;
}
/// Update internal status of chunks after data is written using iterator.
size_type wroteTo(size_type size) {
assert(size <= freeSpace_);
size_type bytesLeft = size;
while (bytesLeft) {
Chunk &chunk = writeChunks_.front();
size_type wrote = std::min<size_type>(chunk.freeSize(), bytesLeft);
assert(wrote);
postWrite(wrote);
bytesLeft -= wrote;
}
return size;
}
/// Append the chunks that have data in src to this buffer
void append(const BufferImpl &src) {
std::copy(src.readChunks_.begin(), src.readChunks_.end(), std::back_inserter(readChunks_));
size_ += src.size_;
}
/// Remove all the chunks that contain data from this buffer.
void discardData() {
readChunks_.clear();
size_ = 0;
}
/// Remove the specified amount of data from the chunks, starting at the front.
void discardData(size_type bytes) {
assert(bytes && bytes <= size_);
size_type bytesToDiscard = bytes;
while (bytesToDiscard) {
size_t currentSize = readChunks_.front().dataSize();
// see if entire chunk is discarded
if (currentSize <= bytesToDiscard) {
readChunks_.pop_front();
bytesToDiscard -= currentSize;
} else {
readChunks_.front().truncateFront(bytesToDiscard);
bytesToDiscard = 0;
}
}
size_ -= bytes;
}
/// Remove the specified amount of data from the chunks, moving the
/// data to dest's chunks
void extractData(BufferImpl &dest, size_type bytes) {
assert(bytes && bytes <= size_);
size_type bytesToExtract = bytes;
while (bytesToExtract) {
size_t currentSize = readChunks_.front().dataSize();
dest.readChunks_.push_back(readChunks_.front());
// see if entire chunk was extracted
if (currentSize <= bytesToExtract) {
readChunks_.pop_front();
bytesToExtract -= currentSize;
} else {
readChunks_.front().truncateFront(bytesToExtract);
size_t excess = currentSize - bytesToExtract;
dest.readChunks_.back().truncateBack(excess);
bytesToExtract = 0;
}
}
size_ -= bytes;
dest.size_ += bytes;
}
/// Move data from this to the destination, leaving this buffer without data
void extractData(BufferImpl &dest) {
assert(dest.readChunks_.empty());
dest.readChunks_.swap(readChunks_);
dest.size_ = size_;
size_ = 0;
}
/// Copy data to a different buffer by copying the chunks. It's
/// a bit like extract, but without modifying the source buffer.
static void copyData(BufferImpl &dest,
ChunkList::const_iterator iter,
size_type offset,
size_type bytes) {
// now we are positioned to start the copying, copy as many
// chunks as we need, the first chunk may have a non-zero offset
// if the data to copy is not at the start of the chunk
size_type copied = 0;
while (copied < bytes) {
dest.readChunks_.push_back(*iter);
// offset only applies in the first chunk,
// all subsequent chunks are copied from the start
dest.readChunks_.back().truncateFront(offset);
offset = 0;
copied += dest.readChunks_.back().dataSize();
++iter;
}
// if the last chunk copied has more bytes than we need, truncate it
size_type excess = copied - bytes;
dest.readChunks_.back().truncateBack(excess);
dest.size_ += bytes;
}
/// The number of chunks containing data. Used for debugging.
int numDataChunks() const {
return readChunks_.size();
}
/// The number of chunks containing free space (note that an entire chunk
/// may not be free). Used for debugging.
int numFreeChunks() const {
return writeChunks_.size();
}
/// Add unmanaged data to the buffer. The buffer will not automatically
/// free the data, but it will call the supplied function when the data is
/// no longer referenced by the buffer (or copies of the buffer).
void appendForeignData(const data_type *data, size_type size, const free_func &func) {
readChunks_.push_back(Chunk(data, size, func));
size_ += size;
}
BufferImpl &operator=(const BufferImpl &src) = delete;
private:
ChunkList readChunks_; ///< chunks of this buffer containing data
ChunkList writeChunks_; ///< chunks of this buffer containing free space
size_type freeSpace_; ///< capacity of buffer before allocation required
size_type size_; ///< amount of data in buffer
};
} // namespace detail
} // namespace avro
#endif

View File

@ -1,211 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_BufferDetailIterator_hh__
#define avro_BufferDetailIterator_hh__
#include "BufferDetail.hh"
/**
* \file BufferDetailIterator.hh
*
* \brief The implementation details for the Buffer iterators.
**/
namespace avro {
namespace detail {
/**
* \brief Implements conversion from a chunk to asio::const_buffer
*
* Iterators for an InputBuffer will iterate over the avro of chunks, so
* internally they contain an iterator. But the iterator needs to be
* convertable to an asio buffer for use in boost::asio functions. This class
* wraps the iterator with a cast operator to do this conversion.
**/
struct InputIteratorHelper {
/// Construct a helper with an unnassigned iterator.
InputIteratorHelper() : iter_() {}
/// Construct a helper with an iterator.
explicit InputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : iter_(iter) {}
/// The location of valid data in this chunk.
const data_type *data() const {
return iter_->tellReadPos();
}
/// The size of valid data in this chunk.
size_type size() const {
return iter_->dataSize();
}
/// Conversion operator. It doesn't check for null, because the only
/// the only time the chunk should be null is when it's the iterator
/// end(), which should never be dereferenced anyway.
#ifdef HAVE_BOOST_ASIO
operator ConstAsioBuffer() const {
return ConstAsioBuffer(data(), size());
}
#endif
BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator
};
/**
* \brief Implements conversion from a chunk to asio::buffer
*
* Iterators for an OutputBuffer will iterate over the avro of chunks, so
* internally they contain an iterator. But the iterator needs to be
* convertable to an asio buffer for use in boost::asio functions. This class
* wraps the iterator with a cast operator to do this conversion.
*/
struct OutputIteratorHelper {
/// Construct a helper with an unnassigned iterator.
OutputIteratorHelper() : iter_() {}
/// Construct a helper with an iterator.
explicit OutputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : iter_(iter) {}
/// The location of the first writable byte in this chunk.
data_type *data() const {
return iter_->tellWritePos();
}
/// The size of area that can be written in this chunk.
size_type size() const {
return iter_->freeSize();
}
/// Conversion operator. It doesn't check for null, because the only
/// the only time the chunk should be null is when it's the iterator
/// end(), which should never be dereferenced anyway.
#ifdef HAVE_BOOST_ASIO
operator MutableAsioBuffer() const {
return MutableAsioBuffer(data(), size());
}
#endif
BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator
};
/**
* \brief Implements the iterator for Buffer, that iterates through the
* buffer's chunks.
**/
template<typename Helper>
class BufferIterator {
public:
typedef BufferIterator<Helper> this_type;
/**
* @name Typedefs
*
* STL iterators define the following declarations. According to
* boost::asio documentation, the library expects the iterator to be
* bidirectional, however this implements only the forward iterator type.
* So far this has not created any problems with asio, but may change if
* future versions of the asio require it.
**/
//@{
typedef std::forward_iterator_tag iterator_category; // this is a lie to appease asio
typedef Helper value_type;
typedef std::ptrdiff_t difference_type;
typedef value_type *pointer;
typedef value_type &reference;
//@}
/// Construct an unitialized iterator.
BufferIterator() : helper_() {}
/* The default implementations are good here
/// Copy constructor.
BufferIterator(const BufferIterator &src) :
helper_(src.helper_)
{ }
/// Assignment.
this_type& operator= (const this_type &rhs) {
helper_ = rhs.helper_;
return *this;
}
*/
/// Construct iterator at the position in the buffer's chunk list.
explicit BufferIterator(BufferImpl::ChunkList::const_iterator iter) : helper_(iter) {}
/// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper.
reference operator*() {
return helper_;
}
/// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper.
const value_type &operator*() const {
return helper_;
}
/// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper.
pointer operator->() {
return &helper_;
}
/// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper.
const value_type *operator->() const {
return &helper_;
}
/// Increment to next chunk in list, or to end() iterator.
this_type &operator++() {
++helper_.iter_;
return *this;
}
/// Increment to next chunk in list, or to end() iterator.
this_type operator++(int) {
this_type ret = *this;
++helper_.iter_;
return ret;
}
/// True if iterators point to same chunks.
bool operator==(const this_type &rhs) const {
return (helper_.iter_ == rhs.helper_.iter_);
}
/// True if iterators point to different chunks.
bool operator!=(const this_type &rhs) const {
return (helper_.iter_ != rhs.helper_.iter_);
}
private:
Helper helper_;
};
typedef BufferIterator<InputIteratorHelper> InputBufferIterator;
typedef BufferIterator<OutputIteratorHelper> OutputBufferIterator;
} // namespace detail
} // namespace avro
#endif

View File

@ -1,135 +0,0 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e # exit on error
function usage {
echo "Usage: $0 {lint|test|dist|clean|install|doc|format}"
exit 1
}
if [ $# -eq 0 ]
then
usage
fi
if [ -f VERSION.txt ]
then
VERSION=`cat VERSION.txt`
else
VERSION=`cat ../../share/VERSION.txt`
fi
BUILD=../../build
AVRO_CPP=avro-cpp-$VERSION
AVRO_DOC=avro-doc-$VERSION
BUILD_DIR=../../build
BUILD_CPP=$BUILD/$AVRO_CPP
DIST_DIR=../../dist/$AVRO_CPP
DOC_CPP=$BUILD/$AVRO_DOC/api/cpp
DIST_DIR=../../dist/cpp
TARFILE=../dist/cpp/$AVRO_CPP.tar.gz
function do_doc() {
doxygen
if [ -d doc ]
then
mkdir -p $DOC_CPP
cp -R doc/* $DOC_CPP
else
exit 1
fi
}
function do_dist() {
rm -rf $BUILD_CPP/
mkdir -p $BUILD_CPP
cp -r api AUTHORS build.sh CMakeLists.txt ChangeLog \
LICENSE NOTICE impl jsonschemas NEWS parser README test examples \
$BUILD_CPP
find $BUILD_CPP -name '.svn' | xargs rm -rf
cp ../../share/VERSION.txt $BUILD_CPP
mkdir -p $DIST_DIR
(cd $BUILD_DIR; tar cvzf $TARFILE $AVRO_CPP && cp $TARFILE $AVRO_CPP )
if [ ! -f $DIST_FILE ]
then
exit 1
fi
}
(mkdir -p build; cd build; cmake --version; cmake -G "Unix Makefiles" ..)
for target in "$@"
do
case "$target" in
lint)
# some versions of cppcheck seem to require an explicit
# "--error-exitcode" option to return non-zero code
cppcheck --error-exitcode=1 --inline-suppr -f -q -x c++ api examples impl test
;;
test)
(cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 .. && make && cd .. \
&& ./build/buffertest \
&& ./build/unittest \
&& ./build/CodecTests \
&& ./build/CompilerTests \
&& ./build/StreamTests \
&& ./build/SpecificTests \
&& ./build/AvrogencppTests \
&& ./build/DataFileTests \
&& ./build/SchemaTests)
;;
xcode-test)
mkdir -p build.xcode
(cd build.xcode \
&& cmake -G Xcode .. \
&& xcodebuild -configuration Release \
&& ctest -C Release)
;;
dist)
(cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release ..)
do_dist
do_doc
;;
doc)
do_doc
;;
format)
clang-format -i --style file `find api -type f` `find impl -type f` `find test -type f`
;;
clean)
(cd build && make clean)
rm -rf doc test.avro test?.df test??.df test_skip.df test_lastSync.df test_readRecordUsingLastSync.df
;;
install)
(cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. && make install)
;;
*)
usage
esac
done
exit 0

View File

@ -1,48 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CPX_HH_1278398428__H_
#define CPX_HH_1278398428__H_
#include "avro/Specific.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
namespace c {
struct cpx {
double re;
double im;
};
}
namespace avro {
template<> struct codec_traits<c::cpx> {
static void encode(Encoder& e, const c::cpx& v) {
avro::encode(e, v.re);
avro::encode(e, v.im);
}
static void decode(Decoder& d, c::cpx& v) {
avro::decode(d, v.re);
avro::decode(d, v.im);
}
};
}
#endif

View File

@ -1,8 +0,0 @@
{
"type": "record",
"name": "cpx",
"fields" : [
{"name": "re", "type": "double"},
{"name": "im", "type" : "double"}
]
}

View File

@ -1,59 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <complex>
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/Specific.hh"
namespace avro {
template<typename T>
struct codec_traits<std::complex<T> > {
static void encode(Encoder& e, const std::complex<T>& c) {
avro::encode(e, std::real(c));
avro::encode(e, std::imag(c));
}
static void decode(Decoder& d, std::complex<T>& c) {
T re, im;
avro::decode(d, re);
avro::decode(d, im);
c = std::complex<T>(re, im);
}
};
}
int
main()
{
std::unique_ptr<avro::OutputStream> out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
std::complex<double> c1(1.0, 2.0);
avro::encode(*e, c1);
std::unique_ptr<avro::InputStream> in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::binaryDecoder();
d->init(*in);
std::complex<double> c2;
avro::decode(*d, c2);
std::cout << '(' << std::real(c2) << ", " << std::imag(c2) << ')' << std::endl;
return 0;
}

View File

@ -1,62 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include "cpx.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
#include "avro/DataFile.hh"
avro::ValidSchema loadSchema(const char* filename)
{
std::ifstream ifs(filename);
avro::ValidSchema result;
avro::compileJsonSchema(ifs, result);
return result;
}
int
main()
{
avro::ValidSchema cpxSchema = loadSchema("cpx.json");
{
avro::DataFileWriter<c::cpx> dfw("test.bin", cpxSchema);
c::cpx c1;
for (int i = 0; i < 100; i++) {
c1.re = i * 100;
c1.im = i + 100;
dfw.write(c1);
}
dfw.close();
}
{
avro::DataFileReader<c::cpx> dfr("test.bin", cpxSchema);
c::cpx c2;
while (dfr.read(c2)) {
std::cout << '(' << c2.re << ", " << c2.im << ')' << std::endl;
}
}
return 0;
}

View File

@ -1,44 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cpx.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
int
main()
{
std::unique_ptr<avro::OutputStream> out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
c::cpx c1;
c1.re = 1.0;
c1.im = 2.13;
avro::encode(*e, c1);
std::unique_ptr<avro::InputStream> in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::binaryDecoder();
d->init(*in);
c::cpx c2;
avro::decode(*d, c2);
std::cout << '(' << c2.re << ", " << c2.im << ')' << std::endl;
return 0;
}

View File

@ -1,68 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include <complex>
#include "cpx.hh"
#include "avro/Compiler.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/Specific.hh"
#include "avro/Generic.hh"
int
main()
{
std::ifstream ifs("cpx.json");
avro::ValidSchema cpxSchema;
avro::compileJsonSchema(ifs, cpxSchema);
std::unique_ptr<avro::OutputStream> out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
c::cpx c1;
c1.re = 100.23;
c1.im = 105.77;
avro::encode(*e, c1);
std::unique_ptr<avro::InputStream> in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::binaryDecoder();
d->init(*in);
avro::GenericDatum datum(cpxSchema);
avro::decode(*d, datum);
std::cout << "Type: " << datum.type() << std::endl;
if (datum.type() == avro::AVRO_RECORD) {
const avro::GenericRecord& r = datum.value<avro::GenericRecord>();
std::cout << "Field-count: " << r.fieldCount() << std::endl;
if (r.fieldCount() == 2) {
const avro::GenericDatum& f0 = r.fieldAt(0);
if (f0.type() == avro::AVRO_DOUBLE) {
std::cout << "Real: " << f0.value<double>() << std::endl;
}
const avro::GenericDatum& f1 = r.fieldAt(1);
if (f1.type() == avro::AVRO_DOUBLE) {
std::cout << "Imaginary: " << f1.value<double>() << std::endl;
}
}
}
return 0;
}

View File

@ -1,46 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef IMAGINARY_HH_3460301992__H_
#define IMAGINARY_HH_3460301992__H_
#include "boost/any.hpp"
#include "avro/Specific.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
namespace i {
struct cpx {
double im;
};
}
namespace avro {
template<> struct codec_traits<i::cpx> {
static void encode(Encoder& e, const i::cpx& v) {
avro::encode(e, v.im);
}
static void decode(Decoder& d, i::cpx& v) {
avro::decode(d, v.im);
}
};
}
#endif

View File

@ -1,7 +0,0 @@
{
"type": "record",
"name": "cpx",
"fields" : [
{"name": "im", "type" : "double"}
]
}

View File

@ -1,63 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include "cpx.hh"
#include "imaginary.hh"
#include "avro/Compiler.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/Specific.hh"
#include "avro/Generic.hh"
avro::ValidSchema load(const char* filename)
{
std::ifstream ifs(filename);
avro::ValidSchema result;
avro::compileJsonSchema(ifs, result);
return result;
}
int
main()
{
avro::ValidSchema cpxSchema = load("cpx.json");
avro::ValidSchema imaginarySchema = load("imaginary.json");
std::unique_ptr<avro::OutputStream> out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
c::cpx c1;
c1.re = 100.23;
c1.im = 105.77;
avro::encode(*e, c1);
std::unique_ptr<avro::InputStream> in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::resolvingDecoder(cpxSchema, imaginarySchema,
avro::binaryDecoder());
d->init(*in);
i::cpx c2;
avro::decode(*d, c2);
std::cout << "Imaginary: " << c2.im << std::endl;
}

View File

@ -1,32 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
int
main()
{
std::ifstream in("cpx.json");
avro::ValidSchema cpxSchema;
avro::compileJsonSchema(in, cpxSchema);
}

View File

@ -1,68 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include <complex>
#include "avro/Compiler.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/Specific.hh"
namespace avro {
template<typename T>
struct codec_traits<std::complex<T> > {
static void encode(Encoder& e, const std::complex<T>& c) {
avro::encode(e, std::real(c));
avro::encode(e, std::imag(c));
}
static void decode(Decoder& d, std::complex<T>& c) {
T re, im;
avro::decode(d, re);
avro::decode(d, im);
c = std::complex<T>(re, im);
}
};
}
int
main()
{
std::ifstream ifs("cpx.json");
avro::ValidSchema cpxSchema;
avro::compileJsonSchema(ifs, cpxSchema);
std::unique_ptr<avro::OutputStream> out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::validatingEncoder(cpxSchema,
avro::binaryEncoder());
e->init(*out);
std::complex<double> c1(1.0, 2.0);
avro::encode(*e, c1);
std::unique_ptr<avro::InputStream> in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::validatingDecoder(cpxSchema,
avro::binaryDecoder());
d->init(*in);
std::complex<double> c2;
avro::decode(*d, c2);
std::cout << '(' << std::real(c2) << ", " << std::imag(c2) << ')' << std::endl;
return 0;
}

View File

@ -1,222 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Decoder.hh"
#include "Exception.hh"
#include "Zigzag.hh"
#include <memory>
namespace avro {
using std::make_shared;
class BinaryDecoder : public Decoder {
StreamReader in_;
void init(InputStream &is) final;
void decodeNull() final;
bool decodeBool() final;
int32_t decodeInt() final;
int64_t decodeLong() final;
float decodeFloat() final;
double decodeDouble() final;
void decodeString(std::string &value) final;
void skipString() final;
void decodeBytes(std::vector<uint8_t> &value) final;
void skipBytes() final;
void decodeFixed(size_t n, std::vector<uint8_t> &value) final;
void skipFixed(size_t n) final;
size_t decodeEnum() final;
size_t arrayStart() final;
size_t arrayNext() final;
size_t skipArray() final;
size_t mapStart() final;
size_t mapNext() final;
size_t skipMap() final;
size_t decodeUnionIndex() final;
int64_t doDecodeLong();
size_t doDecodeItemCount();
size_t doDecodeLength();
void drain() final;
};
DecoderPtr binaryDecoder() {
return make_shared<BinaryDecoder>();
}
void BinaryDecoder::init(InputStream &is) {
in_.reset(is);
}
void BinaryDecoder::decodeNull() {
}
bool BinaryDecoder::decodeBool() {
auto v = in_.read();
if (v == 0) {
return false;
} else if (v == 1) {
return true;
}
throw Exception(boost::format("Invalid value for bool: %1%") % v);
}
int32_t BinaryDecoder::decodeInt() {
auto val = doDecodeLong();
if (val < INT32_MIN || val > INT32_MAX) {
throw Exception(
boost::format("Value out of range for Avro int: %1%") % val);
}
return static_cast<int32_t>(val);
}
int64_t BinaryDecoder::decodeLong() {
return doDecodeLong();
}
float BinaryDecoder::decodeFloat() {
float result;
in_.readBytes(reinterpret_cast<uint8_t *>(&result), sizeof(float));
return result;
}
double BinaryDecoder::decodeDouble() {
double result;
in_.readBytes(reinterpret_cast<uint8_t *>(&result), sizeof(double));
return result;
}
size_t BinaryDecoder::doDecodeLength() {
ssize_t len = decodeInt();
if (len < 0) {
throw Exception(
boost::format("Cannot have negative length: %1%") % len);
}
return len;
}
void BinaryDecoder::drain() {
in_.drain(false);
}
void BinaryDecoder::decodeString(std::string &value) {
size_t len = doDecodeLength();
value.resize(len);
if (len > 0) {
in_.readBytes(const_cast<uint8_t *>(
reinterpret_cast<const uint8_t *>(value.c_str())),
len);
}
}
void BinaryDecoder::skipString() {
size_t len = doDecodeLength();
in_.skipBytes(len);
}
void BinaryDecoder::decodeBytes(std::vector<uint8_t> &value) {
size_t len = doDecodeLength();
value.resize(len);
if (len > 0) {
in_.readBytes(value.data(), len);
}
}
void BinaryDecoder::skipBytes() {
size_t len = doDecodeLength();
in_.skipBytes(len);
}
void BinaryDecoder::decodeFixed(size_t n, std::vector<uint8_t> &value) {
value.resize(n);
if (n > 0) {
in_.readBytes(value.data(), n);
}
}
void BinaryDecoder::skipFixed(size_t n) {
in_.skipBytes(n);
}
size_t BinaryDecoder::decodeEnum() {
return static_cast<size_t>(doDecodeLong());
}
size_t BinaryDecoder::arrayStart() {
return doDecodeItemCount();
}
size_t BinaryDecoder::doDecodeItemCount() {
auto result = doDecodeLong();
if (result < 0) {
doDecodeLong();
return static_cast<size_t>(-result);
}
return static_cast<size_t>(result);
}
size_t BinaryDecoder::arrayNext() {
return static_cast<size_t>(doDecodeLong());
}
size_t BinaryDecoder::skipArray() {
for (;;) {
auto r = doDecodeLong();
if (r < 0) {
auto n = static_cast<size_t>(doDecodeLong());
in_.skipBytes(n);
} else {
return static_cast<size_t>(r);
}
}
}
size_t BinaryDecoder::mapStart() {
return doDecodeItemCount();
}
size_t BinaryDecoder::mapNext() {
return doDecodeItemCount();
}
size_t BinaryDecoder::skipMap() {
return skipArray();
}
size_t BinaryDecoder::decodeUnionIndex() {
return static_cast<size_t>(doDecodeLong());
}
int64_t BinaryDecoder::doDecodeLong() {
uint64_t encoded = 0;
int shift = 0;
uint8_t u;
do {
if (shift >= 64) {
throw Exception("Invalid Avro varint");
}
u = in_.read();
encoded |= static_cast<uint64_t>(u & 0x7f) << shift;
shift += 7;
} while (u & 0x80);
return decodeZigzag64(encoded);
}
} // namespace avro

View File

@ -1,147 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Encoder.hh"
#include "Zigzag.hh"
#include <array>
namespace avro {
using std::make_shared;
class BinaryEncoder : public Encoder {
StreamWriter out_;
void init(OutputStream &os) final;
void flush() final;
int64_t byteCount() const final;
void encodeNull() final;
void encodeBool(bool b) final;
void encodeInt(int32_t i) final;
void encodeLong(int64_t l) final;
void encodeFloat(float f) final;
void encodeDouble(double d) final;
void encodeString(const std::string &s) final;
void encodeBytes(const uint8_t *bytes, size_t len) final;
void encodeFixed(const uint8_t *bytes, size_t len) final;
void encodeEnum(size_t e) final;
void arrayStart() final;
void arrayEnd() final;
void mapStart() final;
void mapEnd() final;
void setItemCount(size_t count) final;
void startItem() final;
void encodeUnionIndex(size_t e) final;
void doEncodeLong(int64_t l);
};
EncoderPtr binaryEncoder() {
return make_shared<BinaryEncoder>();
}
void BinaryEncoder::init(OutputStream &os) {
out_.reset(os);
}
void BinaryEncoder::flush() {
out_.flush();
}
void BinaryEncoder::encodeNull() {
}
void BinaryEncoder::encodeBool(bool b) {
out_.write(b ? 1 : 0);
}
void BinaryEncoder::encodeInt(int32_t i) {
doEncodeLong(i);
}
void BinaryEncoder::encodeLong(int64_t l) {
doEncodeLong(l);
}
void BinaryEncoder::encodeFloat(float f) {
const auto *p = reinterpret_cast<const uint8_t *>(&f);
out_.writeBytes(p, sizeof(float));
}
void BinaryEncoder::encodeDouble(double d) {
const auto *p = reinterpret_cast<const uint8_t *>(&d);
out_.writeBytes(p, sizeof(double));
}
void BinaryEncoder::encodeString(const std::string &s) {
doEncodeLong(s.size());
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.c_str()), s.size());
}
void BinaryEncoder::encodeBytes(const uint8_t *bytes, size_t len) {
doEncodeLong(len);
out_.writeBytes(bytes, len);
}
void BinaryEncoder::encodeFixed(const uint8_t *bytes, size_t len) {
out_.writeBytes(bytes, len);
}
void BinaryEncoder::encodeEnum(size_t e) {
doEncodeLong(e);
}
void BinaryEncoder::arrayStart() {
}
void BinaryEncoder::arrayEnd() {
doEncodeLong(0);
}
void BinaryEncoder::mapStart() {
}
void BinaryEncoder::mapEnd() {
doEncodeLong(0);
}
void BinaryEncoder::setItemCount(size_t count) {
if (count == 0) {
throw Exception("Count cannot be zero");
}
doEncodeLong(count);
}
void BinaryEncoder::startItem() {
}
void BinaryEncoder::encodeUnionIndex(size_t e) {
doEncodeLong(e);
}
int64_t BinaryEncoder::byteCount() const {
return out_.byteCount();
}
void BinaryEncoder::doEncodeLong(int64_t l) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
std::array<uint8_t, 10> bytes;
auto size = encodeInt64(l, bytes);
out_.writeBytes(bytes.data(), size);
}
} // namespace avro

View File

@ -1,539 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/algorithm/string/replace.hpp>
#include <sstream>
#include <utility>
#include "Compiler.hh"
#include "Schema.hh"
#include "Stream.hh"
#include "Types.hh"
#include "ValidSchema.hh"
#include "json/JsonDom.hh"
using std::make_pair;
using std::map;
using std::pair;
using std::string;
using std::vector;
namespace avro {
using json::Array;
using json::Entity;
using json::EntityType;
using json::Object;
using SymbolTable = map<Name, NodePtr>;
// #define DEBUG_VERBOSE
static NodePtr makePrimitive(const string &t) {
if (t == "null") {
return NodePtr(new NodePrimitive(AVRO_NULL));
} else if (t == "boolean") {
return NodePtr(new NodePrimitive(AVRO_BOOL));
} else if (t == "int") {
return NodePtr(new NodePrimitive(AVRO_INT));
} else if (t == "long") {
return NodePtr(new NodePrimitive(AVRO_LONG));
} else if (t == "float") {
return NodePtr(new NodePrimitive(AVRO_FLOAT));
} else if (t == "double") {
return NodePtr(new NodePrimitive(AVRO_DOUBLE));
} else if (t == "string") {
return NodePtr(new NodePrimitive(AVRO_STRING));
} else if (t == "bytes") {
return NodePtr(new NodePrimitive(AVRO_BYTES));
} else {
return NodePtr();
}
}
static NodePtr makeNode(const json::Entity &e, SymbolTable &st, const string &ns);
template<typename T>
concepts::SingleAttribute<T> asSingleAttribute(const T &t) {
concepts::SingleAttribute<T> n;
n.add(t);
return n;
}
static bool isFullName(const string &s) {
return s.find('.') != string::npos;
}
static Name getName(const string &name, const string &ns) {
return (isFullName(name)) ? Name(name) : Name(name, ns);
}
static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) {
NodePtr result = makePrimitive(t);
if (result) {
return result;
}
Name n = getName(t, ns);
auto it = st.find(n);
if (it != st.end()) {
return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
}
throw Exception(boost::format("Unknown type: %1%") % n.fullname());
}
/** Returns "true" if the field is in the container */
// e.g.: can be false for non-mandatory fields
bool containsField(const Object &m, const string &fieldName) {
auto it = m.find(fieldName);
return (it != m.end());
}
json::Object::const_iterator findField(const Entity &e,
const Object &m, const string &fieldName);
template<typename T>
void ensureType(const Entity &e, const string &name) {
if (e.type() != json::type_traits<T>::type()) {
throw Exception(boost::format("Json field \"%1%\" is not a %2%: %3%") % name % json::type_traits<T>::name() % e.toString());
}
}
string getStringField(const Entity &e, const Object &m,
const string &fieldName) {
auto it = findField(e, m, fieldName);
ensureType<string>(it->second, fieldName);
return it->second.stringValue();
}
const Array &getArrayField(const Entity &e, const Object &m,
const string &fieldName);
int64_t getLongField(const Entity &e, const Object &m,
const string &fieldName) {
auto it = findField(e, m, fieldName);
ensureType<int64_t>(it->second, fieldName);
return it->second.longValue();
}
// Unescape double quotes (") for de-serialization. This method complements the
// method NodeImpl::escape() which is used for serialization.
static void unescape(string &s) {
boost::replace_all(s, "\\\"", "\"");
}
string getDocField(const Entity &e, const Object &m) {
string doc = getStringField(e, m, "doc");
unescape(doc);
return doc;
}
struct Field {
const string name;
const NodePtr schema;
const GenericDatum defaultValue;
Field(string n, NodePtr v, GenericDatum dv) : name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)) {}
};
static void assertType(const Entity &e, EntityType et) {
if (e.type() != et) {
throw Exception(boost::format("Unexpected type for default value: "
"Expected %1%, but found %2% in line %3%")
% json::typeToString(et) % json::typeToString(e.type()) % e.line());
}
}
static vector<uint8_t> toBin(const string &s) {
vector<uint8_t> result(s.size());
if (!s.empty()) {
std::copy(s.c_str(), s.c_str() + s.size(), result.data());
}
return result;
}
static GenericDatum makeGenericDatum(NodePtr n,
const Entity &e, const SymbolTable &st) {
Type t = n->type();
EntityType dt = e.type();
if (t == AVRO_SYMBOLIC) {
n = st.find(n->name())->second;
t = n->type();
}
switch (t) {
case AVRO_STRING:
assertType(e, json::EntityType::String);
return GenericDatum(e.stringValue());
case AVRO_BYTES:
assertType(e, json::EntityType::String);
return GenericDatum(toBin(e.bytesValue()));
case AVRO_INT:
assertType(e, json::EntityType::Long);
return GenericDatum(static_cast<int32_t>(e.longValue()));
case AVRO_LONG:
assertType(e, json::EntityType::Long);
return GenericDatum(e.longValue());
case AVRO_FLOAT:
if (dt == json::EntityType::Long) {
return GenericDatum(static_cast<float>(e.longValue()));
}
assertType(e, json::EntityType::Double);
return GenericDatum(static_cast<float>(e.doubleValue()));
case AVRO_DOUBLE:
if (dt == json::EntityType::Long) {
return GenericDatum(static_cast<double>(e.longValue()));
}
assertType(e, json::EntityType::Double);
return GenericDatum(e.doubleValue());
case AVRO_BOOL:
assertType(e, json::EntityType::Bool);
return GenericDatum(e.boolValue());
case AVRO_NULL:
assertType(e, json::EntityType::Null);
return GenericDatum();
case AVRO_RECORD: {
assertType(e, json::EntityType::Obj);
GenericRecord result(n);
const map<string, Entity> &v = e.objectValue();
for (size_t i = 0; i < n->leaves(); ++i) {
auto it = v.find(n->nameAt(i));
if (it == v.end()) {
throw Exception(boost::format(
"No value found in default for %1%")
% n->nameAt(i));
}
result.setFieldAt(i,
makeGenericDatum(n->leafAt(i), it->second, st));
}
return GenericDatum(n, result);
}
case AVRO_ENUM:
assertType(e, json::EntityType::String);
return GenericDatum(n, GenericEnum(n, e.stringValue()));
case AVRO_ARRAY: {
assertType(e, json::EntityType::Arr);
GenericArray result(n);
const vector<Entity> &elements = e.arrayValue();
for (const auto &element : elements) {
result.value().push_back(makeGenericDatum(n->leafAt(0), element, st));
}
return GenericDatum(n, result);
}
case AVRO_MAP: {
assertType(e, json::EntityType::Obj);
GenericMap result(n);
const map<string, Entity> &v = e.objectValue();
for (const auto &it : v) {
result.value().push_back(make_pair(it.first,
makeGenericDatum(n->leafAt(1), it.second, st)));
}
return GenericDatum(n, result);
}
case AVRO_UNION: {
GenericUnion result(n);
result.selectBranch(0);
result.datum() = makeGenericDatum(n->leafAt(0), e, st);
return GenericDatum(n, result);
}
case AVRO_FIXED:
assertType(e, json::EntityType::String);
return GenericDatum(n, GenericFixed(n, toBin(e.bytesValue())));
default: throw Exception(boost::format("Unknown type: %1%") % t);
}
}
static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
const Object &m = e.objectValue();
const string &n = getStringField(e, m, "name");
auto it = findField(e, m, "type");
auto it2 = m.find("default");
NodePtr node = makeNode(it->second, st, ns);
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
GenericDatum d = (it2 == m.end()) ? GenericDatum() : makeGenericDatum(node, it2->second, st);
return Field(n, node, d);
}
// Extended makeRecordNode (with doc).
static NodePtr makeRecordNode(const Entity &e, const Name &name,
const string *doc, const Object &m,
SymbolTable &st, const string &ns) {
const Array &v = getArrayField(e, m, "fields");
concepts::MultiAttribute<string> fieldNames;
concepts::MultiAttribute<NodePtr> fieldValues;
vector<GenericDatum> defaultValues;
for (const auto &it : v) {
Field f = makeField(it, st, ns);
fieldNames.add(f.name);
fieldValues.add(f.schema);
defaultValues.push_back(f.defaultValue);
}
NodeRecord *node;
if (doc == nullptr) {
node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames,
defaultValues);
} else {
node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc),
fieldValues, fieldNames, defaultValues);
}
return NodePtr(node);
}
static LogicalType makeLogicalType(const Entity &e, const Object &m) {
if (!containsField(m, "logicalType")) {
return LogicalType(LogicalType::NONE);
}
const std::string &typeField = getStringField(e, m, "logicalType");
if (typeField == "decimal") {
LogicalType decimalType(LogicalType::DECIMAL);
try {
decimalType.setPrecision(getLongField(e, m, "precision"));
if (containsField(m, "scale")) {
decimalType.setScale(getLongField(e, m, "scale"));
}
} catch (Exception &ex) {
// If any part of the logical type is malformed, per the standard we
// must ignore the whole attribute.
return LogicalType(LogicalType::NONE);
}
return decimalType;
}
LogicalType::Type t = LogicalType::NONE;
if (typeField == "date")
t = LogicalType::DATE;
else if (typeField == "time-millis")
t = LogicalType::TIME_MILLIS;
else if (typeField == "time-micros")
t = LogicalType::TIME_MICROS;
else if (typeField == "timestamp-millis")
t = LogicalType::TIMESTAMP_MILLIS;
else if (typeField == "timestamp-micros")
t = LogicalType::TIMESTAMP_MICROS;
else if (typeField == "duration")
t = LogicalType::DURATION;
else if (typeField == "uuid")
t = LogicalType::UUID;
return LogicalType(t);
}
static NodePtr makeEnumNode(const Entity &e,
const Name &name, const Object &m) {
const Array &v = getArrayField(e, m, "symbols");
concepts::MultiAttribute<string> symbols;
for (const auto &it : v) {
if (it.type() != json::EntityType::String) {
throw Exception(boost::format("Enum symbol not a string: %1%") % it.toString());
}
symbols.add(it.stringValue());
}
NodePtr node = NodePtr(new NodeEnum(asSingleAttribute(name), symbols));
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
return node;
}
static NodePtr makeFixedNode(const Entity &e,
const Name &name, const Object &m) {
int v = static_cast<int>(getLongField(e, m, "size"));
if (v <= 0) {
throw Exception(boost::format("Size for fixed is not positive: %1%") % e.toString());
}
NodePtr node =
NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(v)));
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
return node;
}
static NodePtr makeArrayNode(const Entity &e, const Object &m,
SymbolTable &st, const string &ns) {
auto it = findField(e, m, "items");
NodePtr node = NodePtr(new NodeArray(
asSingleAttribute(makeNode(it->second, st, ns))));
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
return node;
}
static NodePtr makeMapNode(const Entity &e, const Object &m,
SymbolTable &st, const string &ns) {
auto it = findField(e, m, "values");
NodePtr node = NodePtr(new NodeMap(
asSingleAttribute(makeNode(it->second, st, ns))));
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
return node;
}
static Name getName(const Entity &e, const Object &m, const string &ns) {
const string &name = getStringField(e, m, "name");
if (isFullName(name)) {
return Name(name);
} else {
auto it = m.find("namespace");
if (it != m.end()) {
if (it->second.type() != json::type_traits<string>::type()) {
throw Exception(boost::format(
"Json field \"%1%\" is not a %2%: %3%")
% "namespace" % json::type_traits<string>::name() % it->second.toString());
}
Name result = Name(name, it->second.stringValue());
return result;
}
return Name(name, ns);
}
}
static NodePtr makeNode(const Entity &e, const Object &m,
SymbolTable &st, const string &ns) {
const string &type = getStringField(e, m, "type");
NodePtr result;
if (type == "record" || type == "error" || type == "enum" || type == "fixed") {
Name nm = getName(e, m, ns);
if (type == "record" || type == "error") {
result = NodePtr(new NodeRecord());
st[nm] = result;
// Get field doc
if (containsField(m, "doc")) {
string doc = getDocField(e, m);
NodePtr r = makeRecordNode(e, nm, &doc, m, st, nm.ns());
(std::dynamic_pointer_cast<NodeRecord>(r))->swap(*std::dynamic_pointer_cast<NodeRecord>(result));
} else { // No doc
NodePtr r =
makeRecordNode(e, nm, nullptr, m, st, nm.ns());
(std::dynamic_pointer_cast<NodeRecord>(r))
->swap(*std::dynamic_pointer_cast<NodeRecord>(result));
}
} else {
result = (type == "enum") ? makeEnumNode(e, nm, m) : makeFixedNode(e, nm, m);
st[nm] = result;
}
} else if (type == "array") {
result = makeArrayNode(e, m, st, ns);
} else if (type == "map") {
result = makeMapNode(e, m, st, ns);
} else {
result = makePrimitive(type);
}
if (result) {
try {
result->setLogicalType(makeLogicalType(e, m));
} catch (Exception &ex) {
// Per the standard we must ignore the logical type attribute if it
// is malformed.
}
return result;
}
throw Exception(boost::format("Unknown type definition: %1%")
% e.toString());
}
static NodePtr makeNode(const Entity &e, const Array &m,
SymbolTable &st, const string &ns) {
concepts::MultiAttribute<NodePtr> mm;
for (const auto &it : m) {
mm.add(makeNode(it, st, ns));
}
return NodePtr(new NodeUnion(mm));
}
static NodePtr makeNode(const json::Entity &e, SymbolTable &st, const string &ns) {
switch (e.type()) {
case json::EntityType::String: return makeNode(e.stringValue(), st, ns);
case json::EntityType::Obj: return makeNode(e, e.objectValue(), st, ns);
case json::EntityType::Arr: return makeNode(e, e.arrayValue(), st, ns);
default: throw Exception(boost::format("Invalid Avro type: %1%") % e.toString());
}
}
json::Object::const_iterator findField(const Entity &e, const Object &m, const string &fieldName) {
auto it = m.find(fieldName);
if (it == m.end()) {
throw Exception(boost::format("Missing Json field \"%1%\": %2%") % fieldName % e.toString());
} else {
return it;
}
}
const Array &getArrayField(const Entity &e, const Object &m, const string &fieldName) {
auto it = findField(e, m, fieldName);
ensureType<Array>(it->second, fieldName);
return it->second.arrayValue();
}
ValidSchema compileJsonSchemaFromStream(InputStream &is) {
json::Entity e = json::loadEntity(is);
SymbolTable st;
NodePtr n = makeNode(e, st, "");
return ValidSchema(n);
}
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename) {
std::unique_ptr<InputStream> s = fileInputStream(filename);
return compileJsonSchemaFromStream(*s);
}
AVRO_DECL ValidSchema compileJsonSchemaFromMemory(const uint8_t *input, size_t len) {
return compileJsonSchemaFromStream(*memoryInputStream(input, len));
}
AVRO_DECL ValidSchema compileJsonSchemaFromString(const char *input) {
return compileJsonSchemaFromMemory(reinterpret_cast<const uint8_t *>(input),
::strlen(input));
}
AVRO_DECL ValidSchema compileJsonSchemaFromString(const string &input) {
return compileJsonSchemaFromMemory(
reinterpret_cast<const uint8_t *>(input.data()), input.size());
}
static ValidSchema compile(std::istream &is) {
std::unique_ptr<InputStream> in = istreamInputStream(is);
return compileJsonSchemaFromStream(*in);
}
void compileJsonSchema(std::istream &is, ValidSchema &schema) {
if (!is.good()) {
throw Exception("Input stream is not good");
}
schema = compile(is);
}
AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, string &error) {
try {
compileJsonSchema(is, schema);
return true;
} catch (const Exception &e) {
error = e.what();
return false;
}
}
} // namespace avro

View File

@ -1,563 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "DataFile.hh"
#include "Compiler.hh"
#include "Exception.hh"
#include <sstream>
#include <boost/crc.hpp> // for boost::crc_32_type
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/random/mersenne_twister.hpp>
#ifdef SNAPPY_CODEC_AVAILABLE
#include <snappy.h>
#endif
namespace avro {
using std::copy;
using std::istringstream;
using std::ostringstream;
using std::string;
using std::unique_ptr;
using std::vector;
using std::array;
namespace {
const string AVRO_SCHEMA_KEY("avro.schema");
const string AVRO_CODEC_KEY("avro.codec");
const string AVRO_NULL_CODEC("null");
const string AVRO_DEFLATE_CODEC("deflate");
#ifdef SNAPPY_CODEC_AVAILABLE
const string AVRO_SNAPPY_CODEC = "snappy";
#endif
const size_t minSyncInterval = 32;
const size_t maxSyncInterval = 1u << 30;
boost::iostreams::zlib_params get_zlib_params() {
boost::iostreams::zlib_params ret;
ret.method = boost::iostreams::zlib::deflated;
ret.noheader = true;
return ret;
}
} // namespace
DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema &schema, size_t syncInterval,
Codec codec) : filename_(filename),
schema_(schema),
encoderPtr_(binaryEncoder()),
syncInterval_(syncInterval),
codec_(codec),
stream_(fileOutputStream(filename)),
buffer_(memoryOutputStream()),
sync_(makeSync()),
objectCount_(0),
lastSync_(0) {
init(schema, syncInterval, codec);
}
DataFileWriterBase::DataFileWriterBase(std::unique_ptr<OutputStream> outputStream,
const ValidSchema &schema, size_t syncInterval, Codec codec) : filename_(),
schema_(schema),
encoderPtr_(binaryEncoder()),
syncInterval_(syncInterval),
codec_(codec),
stream_(std::move(outputStream)),
buffer_(memoryOutputStream()),
sync_(makeSync()),
objectCount_(0),
lastSync_(0) {
init(schema, syncInterval, codec);
}
void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, const Codec &codec) {
if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) {
throw Exception(boost::format("Invalid sync interval: %1%. "
"Should be between %2% and %3%")
% syncInterval % minSyncInterval % maxSyncInterval);
}
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
if (codec_ == NULL_CODEC) {
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
} else if (codec_ == DEFLATE_CODEC) {
setMetadata(AVRO_CODEC_KEY, AVRO_DEFLATE_CODEC);
#ifdef SNAPPY_CODEC_AVAILABLE
} else if (codec_ == SNAPPY_CODEC) {
setMetadata(AVRO_CODEC_KEY, AVRO_SNAPPY_CODEC);
#endif
} else {
throw Exception(boost::format("Unknown codec: %1%") % codec);
}
setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false));
writeHeader();
encoderPtr_->init(*buffer_);
lastSync_ = stream_->byteCount();
}
DataFileWriterBase::~DataFileWriterBase() {
if (stream_) {
close();
}
}
void DataFileWriterBase::close() {
flush();
stream_.reset();
}
void DataFileWriterBase::sync() {
encoderPtr_->flush();
encoderPtr_->init(*stream_);
avro::encode(*encoderPtr_, objectCount_);
if (codec_ == NULL_CODEC) {
int64_t byteCount = buffer_->byteCount();
avro::encode(*encoderPtr_, byteCount);
encoderPtr_->flush();
std::unique_ptr<InputStream> in = memoryInputStream(*buffer_);
copy(*in, *stream_);
} else if (codec_ == DEFLATE_CODEC) {
std::vector<char> buf;
{
boost::iostreams::filtering_ostream os;
os.push(boost::iostreams::zlib_compressor(get_zlib_params()));
os.push(boost::iostreams::back_inserter(buf));
const uint8_t *data;
size_t len;
std::unique_ptr<InputStream> input = memoryInputStream(*buffer_);
while (input->next(&data, &len)) {
boost::iostreams::write(os, reinterpret_cast<const char *>(data), len);
}
} // make sure all is flushed
std::unique_ptr<InputStream> in = memoryInputStream(
reinterpret_cast<const uint8_t *>(buf.data()), buf.size());
int64_t byteCount = buf.size();
avro::encode(*encoderPtr_, byteCount);
encoderPtr_->flush();
copy(*in, *stream_);
#ifdef SNAPPY_CODEC_AVAILABLE
} else if (codec_ == SNAPPY_CODEC) {
std::vector<char> temp;
std::string compressed;
boost::crc_32_type crc;
{
boost::iostreams::filtering_ostream os;
os.push(boost::iostreams::back_inserter(temp));
const uint8_t *data;
size_t len;
std::unique_ptr<InputStream> input = memoryInputStream(*buffer_);
while (input->next(&data, &len)) {
boost::iostreams::write(os, reinterpret_cast<const char *>(data),
len);
}
} // make sure all is flushed
crc.process_bytes(reinterpret_cast<const char *>(temp.data()),
temp.size());
// For Snappy, add the CRC32 checksum
int32_t checksum = crc();
// Now compress
size_t compressed_size = snappy::Compress(
reinterpret_cast<const char *>(temp.data()), temp.size(),
&compressed);
temp.clear();
{
boost::iostreams::filtering_ostream os;
os.push(boost::iostreams::back_inserter(temp));
boost::iostreams::write(os, compressed.c_str(), compressed_size);
}
temp.push_back((checksum >> 24) & 0xFF);
temp.push_back((checksum >> 16) & 0xFF);
temp.push_back((checksum >> 8) & 0xFF);
temp.push_back(checksum & 0xFF);
std::unique_ptr<InputStream> in = memoryInputStream(
reinterpret_cast<const uint8_t *>(temp.data()), temp.size());
int64_t byteCount = temp.size();
avro::encode(*encoderPtr_, byteCount);
encoderPtr_->flush();
copy(*in, *stream_);
#endif
}
encoderPtr_->init(*stream_);
avro::encode(*encoderPtr_, sync_);
encoderPtr_->flush();
lastSync_ = stream_->byteCount();
buffer_ = memoryOutputStream();
encoderPtr_->init(*buffer_);
objectCount_ = 0;
}
void DataFileWriterBase::syncIfNeeded() {
encoderPtr_->flush();
if (buffer_->byteCount() >= syncInterval_) {
sync();
}
}
uint64_t DataFileWriterBase::getCurrentBlockStart() const {
return lastSync_;
}
void DataFileWriterBase::flush() {
sync();
}
boost::mt19937 random(static_cast<uint32_t>(time(nullptr)));
DataFileSync DataFileWriterBase::makeSync() {
DataFileSync sync;
std::generate(sync.begin(), sync.end(), random);
return sync;
}
typedef array<uint8_t, 4> Magic;
static Magic magic = {{'O', 'b', 'j', '\x01'}};
void DataFileWriterBase::writeHeader() {
encoderPtr_->init(*stream_);
avro::encode(*encoderPtr_, magic);
avro::encode(*encoderPtr_, metadata_);
avro::encode(*encoderPtr_, sync_);
encoderPtr_->flush();
}
void DataFileWriterBase::setMetadata(const string &key, const string &value) {
vector<uint8_t> v(value.size());
copy(value.begin(), value.end(), v.begin());
metadata_[key] = v;
}
DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), codec_(NULL_CODEC), stream_(fileSeekableInputStream(filename)),
decoder_(binaryDecoder()), objectCount_(0), eof_(false), blockStart_(-1),
blockEnd_(-1) {
readHeader();
}
DataFileReaderBase::DataFileReaderBase(std::unique_ptr<InputStream> inputStream) : codec_(NULL_CODEC), stream_(std::move(inputStream)),
decoder_(binaryDecoder()), objectCount_(0), eof_(false) {
readHeader();
}
void DataFileReaderBase::init() {
readerSchema_ = dataSchema_;
dataDecoder_ = binaryDecoder();
readDataBlock();
}
void DataFileReaderBase::init(const ValidSchema &readerSchema) {
readerSchema_ = readerSchema;
dataDecoder_ = (readerSchema_.toJson(true) != dataSchema_.toJson(true)) ? resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) : binaryDecoder();
readDataBlock();
}
static void drain(InputStream &in) {
const uint8_t *p = nullptr;
size_t n = 0;
while (in.next(&p, &n))
;
}
char hex(unsigned int x) {
return static_cast<char>(x + (x < 10 ? '0' : ('a' - 10)));
}
std::ostream &operator<<(std::ostream &os, const DataFileSync &s) {
for (uint8_t i : s) {
os << hex(i / 16) << hex(i % 16) << ' ';
}
os << std::endl;
return os;
}
bool DataFileReaderBase::hasMore() {
for (;;) {
if (eof_) {
return false;
} else if (objectCount_ != 0) {
return true;
}
dataDecoder_->init(*dataStream_);
drain(*dataStream_);
DataFileSync s;
decoder_->init(*stream_);
avro::decode(*decoder_, s);
if (s != sync_) {
throw Exception("Sync mismatch");
}
readDataBlock();
}
}
class BoundedInputStream : public InputStream {
InputStream &in_;
size_t limit_;
bool next(const uint8_t **data, size_t *len) final {
if (limit_ != 0 && in_.next(data, len)) {
if (*len > limit_) {
in_.backup(*len - limit_);
*len = limit_;
}
limit_ -= *len;
return true;
}
return false;
}
void backup(size_t len) final {
in_.backup(len);
limit_ += len;
}
void skip(size_t len) final {
if (len > limit_) {
len = limit_;
}
in_.skip(len);
limit_ -= len;
}
size_t byteCount() const final {
return in_.byteCount();
}
public:
BoundedInputStream(InputStream &in, size_t limit) : in_(in), limit_(limit) {}
};
unique_ptr<InputStream> boundedInputStream(InputStream &in, size_t limit) {
return unique_ptr<InputStream>(new BoundedInputStream(in, limit));
}
void DataFileReaderBase::readDataBlock() {
decoder_->init(*stream_);
blockStart_ = stream_->byteCount();
const uint8_t *p = nullptr;
size_t n = 0;
if (!stream_->next(&p, &n)) {
eof_ = true;
return;
}
stream_->backup(n);
avro::decode(*decoder_, objectCount_);
int64_t byteCount;
avro::decode(*decoder_, byteCount);
decoder_->init(*stream_);
blockEnd_ = stream_->byteCount() + byteCount;
unique_ptr<InputStream> st = boundedInputStream(*stream_, static_cast<size_t>(byteCount));
if (codec_ == NULL_CODEC) {
dataDecoder_->init(*st);
dataStream_ = std::move(st);
#ifdef SNAPPY_CODEC_AVAILABLE
} else if (codec_ == SNAPPY_CODEC) {
boost::crc_32_type crc;
uint32_t checksum = 0;
compressed_.clear();
uncompressed.clear();
const uint8_t *data;
size_t len;
while (st->next(&data, &len)) {
compressed_.insert(compressed_.end(), data, data + len);
}
len = compressed_.size();
int b1 = compressed_[len - 4] & 0xFF;
int b2 = compressed_[len - 3] & 0xFF;
int b3 = compressed_[len - 2] & 0xFF;
int b4 = compressed_[len - 1] & 0xFF;
checksum = (b1 << 24) + (b2 << 16) + (b3 << 8) + (b4);
if (!snappy::Uncompress(reinterpret_cast<const char *>(compressed_.data()),
len - 4, &uncompressed)) {
throw Exception(
"Snappy Compression reported an error when decompressing");
}
crc.process_bytes(uncompressed.c_str(), uncompressed.size());
uint32_t c = crc();
if (checksum != c) {
throw Exception(
boost::format("Checksum did not match for Snappy compression: Expected: %1%, computed: %2%") % checksum
% c);
}
os_.reset(new boost::iostreams::filtering_istream());
os_->push(
boost::iostreams::basic_array_source<char>(uncompressed.c_str(),
uncompressed.size()));
std::unique_ptr<InputStream> in = istreamInputStream(*os_);
dataDecoder_->init(*in);
dataStream_ = std::move(in);
#endif
} else {
compressed_.clear();
const uint8_t *data;
size_t len;
while (st->next(&data, &len)) {
compressed_.insert(compressed_.end(), data, data + len);
}
os_.reset(new boost::iostreams::filtering_istream());
os_->push(boost::iostreams::zlib_decompressor(get_zlib_params()));
os_->push(boost::iostreams::basic_array_source<char>(
compressed_.data(), compressed_.size()));
std::unique_ptr<InputStream> in = nonSeekableIstreamInputStream(*os_);
dataDecoder_->init(*in);
dataStream_ = std::move(in);
}
}
void DataFileReaderBase::close() {
}
static string toString(const vector<uint8_t> &v) {
string result;
result.resize(v.size());
copy(v.begin(), v.end(), result.begin());
return result;
}
static ValidSchema makeSchema(const vector<uint8_t> &v) {
istringstream iss(toString(v));
ValidSchema vs;
compileJsonSchema(iss, vs);
return ValidSchema(vs);
}
void DataFileReaderBase::readHeader() {
decoder_->init(*stream_);
Magic m;
avro::decode(*decoder_, m);
if (magic != m) {
throw Exception("Invalid data file. Magic does not match: "
+ filename_);
}
avro::decode(*decoder_, metadata_);
Metadata::const_iterator it = metadata_.find(AVRO_SCHEMA_KEY);
if (it == metadata_.end()) {
throw Exception("No schema in metadata");
}
dataSchema_ = makeSchema(it->second);
if (!readerSchema_.root()) {
readerSchema_ = dataSchema();
}
it = metadata_.find(AVRO_CODEC_KEY);
if (it != metadata_.end() && toString(it->second) == AVRO_DEFLATE_CODEC) {
codec_ = DEFLATE_CODEC;
#ifdef SNAPPY_CODEC_AVAILABLE
} else if (it != metadata_.end()
&& toString(it->second) == AVRO_SNAPPY_CODEC) {
codec_ = SNAPPY_CODEC;
#endif
} else {
codec_ = NULL_CODEC;
if (it != metadata_.end() && toString(it->second) != AVRO_NULL_CODEC) {
throw Exception("Unknown codec in data file: " + toString(it->second));
}
}
avro::decode(*decoder_, sync_);
decoder_->init(*stream_);
blockStart_ = stream_->byteCount();
}
void DataFileReaderBase::doSeek(int64_t position) {
if (auto *ss = dynamic_cast<SeekableInputStream *>(stream_.get())) {
if (!eof_) {
dataDecoder_->init(*dataStream_);
drain(*dataStream_);
}
decoder_->init(*stream_);
ss->seek(position);
eof_ = false;
} else {
throw Exception("seek not supported on non-SeekableInputStream");
}
}
void DataFileReaderBase::seek(int64_t position) {
doSeek(position);
readDataBlock();
}
void DataFileReaderBase::sync(int64_t position) {
doSeek(position);
DataFileSync sync_buffer;
const uint8_t *p = nullptr;
size_t n = 0;
size_t i = 0;
while (i < SyncSize) {
if (n == 0 && !stream_->next(&p, &n)) {
eof_ = true;
return;
}
int len =
std::min(static_cast<size_t>(SyncSize - i), n);
memcpy(&sync_buffer[i], p, len);
p += len;
n -= len;
i += len;
}
for (;;) {
size_t j = 0;
for (; j < SyncSize; ++j) {
if (sync_[j] != sync_buffer[(i + j) % SyncSize]) {
break;
}
}
if (j == SyncSize) {
// Found the sync marker!
break;
}
if (n == 0 && !stream_->next(&p, &n)) {
eof_ = true;
return;
}
sync_buffer[i++ % SyncSize] = *p++;
--n;
}
stream_->backup(n);
readDataBlock();
}
bool DataFileReaderBase::pastSync(int64_t position) {
return !hasMore() || blockStart_ >= position + SyncSize;
}
int64_t DataFileReaderBase::previousSync() const {
return blockStart_;
}
} // namespace avro

View File

@ -1,375 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Stream.hh"
#include <fstream>
#ifndef _WIN32
#include "fcntl.h"
#include "unistd.h"
#include <cerrno>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#else
#include "Windows.h"
#ifdef min
#undef min
#endif
#endif
using std::istream;
using std::ostream;
using std::unique_ptr;
namespace avro {
namespace {
struct BufferCopyIn {
virtual ~BufferCopyIn() = default;
virtual void seek(size_t len) = 0;
virtual bool read(uint8_t *b, size_t toRead, size_t &actual) = 0;
};
struct FileBufferCopyIn : public BufferCopyIn {
#ifdef _WIN32
HANDLE h_;
FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
if (h_ == INVALID_HANDLE_VALUE) {
throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError());
}
}
~FileBufferCopyIn() {
::CloseHandle(h_);
}
void seek(size_t len) {
if (::SetFilePointer(h_, len, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER && ::GetLastError() != NO_ERROR) {
throw Exception(boost::format("Cannot skip file: %1%") % ::GetLastError());
}
}
bool read(uint8_t *b, size_t toRead, size_t &actual) {
DWORD dw = 0;
if (!::ReadFile(h_, b, toRead, &dw, NULL)) {
throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError());
}
actual = static_cast<size_t>(dw);
return actual != 0;
}
#else
const int fd_;
explicit FileBufferCopyIn(const char *filename) : fd_(open(filename, O_RDONLY | O_BINARY)) {
if (fd_ < 0) {
throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno));
}
}
~FileBufferCopyIn() override {
::close(fd_);
}
void seek(size_t len) final {
off_t r = ::lseek(fd_, len, SEEK_CUR);
if (r == static_cast<off_t>(-1)) {
throw Exception(boost::format("Cannot skip file: %1%") % strerror(errno));
}
}
bool read(uint8_t *b, size_t toRead, size_t &actual) final {
int n = ::read(fd_, b, toRead);
if (n > 0) {
actual = n;
return true;
}
return false;
}
#endif
};
struct IStreamBufferCopyIn : public BufferCopyIn {
istream &is_;
explicit IStreamBufferCopyIn(istream &is) : is_(is) {
}
void seek(size_t len) override {
if (!is_.seekg(len, std::ios_base::cur)) {
throw Exception("Cannot skip stream");
}
}
bool read(uint8_t *b, size_t toRead, size_t &actual) override {
is_.read(reinterpret_cast<char *>(b), toRead);
if (is_.bad()) {
return false;
}
actual = static_cast<size_t>(is_.gcount());
return (!is_.eof() || actual != 0);
}
};
struct NonSeekableIStreamBufferCopyIn : public IStreamBufferCopyIn {
explicit NonSeekableIStreamBufferCopyIn(istream &is) : IStreamBufferCopyIn(is) {}
void seek(size_t len) final {
const size_t bufSize = 4096;
uint8_t buf[bufSize];
while (len > 0) {
size_t n = std::min(len, bufSize);
is_.read(reinterpret_cast<char *>(buf), n);
if (is_.bad()) {
throw Exception("Cannot skip stream");
}
auto actual = static_cast<size_t>(is_.gcount());
if (is_.eof() && actual == 0) {
throw Exception("Cannot skip stream");
}
len -= n;
}
}
};
} // namespace
class BufferCopyInInputStream : public SeekableInputStream {
const size_t bufferSize_;
uint8_t *const buffer_;
unique_ptr<BufferCopyIn> in_;
size_t byteCount_;
uint8_t *next_;
size_t available_;
bool next(const uint8_t **data, size_t *size) final {
if (available_ == 0 && !fill()) {
return false;
}
*data = next_;
*size = available_;
next_ += available_;
byteCount_ += available_;
available_ = 0;
return true;
}
void backup(size_t len) final {
next_ -= len;
available_ += len;
byteCount_ -= len;
}
void skip(size_t len) final {
while (len > 0) {
if (available_ == 0) {
in_->seek(len);
byteCount_ += len;
return;
}
size_t n = std::min(available_, len);
available_ -= n;
next_ += n;
len -= n;
byteCount_ += n;
}
}
size_t byteCount() const final { return byteCount_; }
bool fill() {
size_t n = 0;
if (in_->read(buffer_, bufferSize_, n)) {
next_ = buffer_;
available_ = n;
return true;
}
return false;
}
void seek(int64_t position) final {
// BufferCopyIn::seek is relative to byteCount_, whereas position is
// absolute.
in_->seek(position - byteCount_ - available_);
byteCount_ = position;
available_ = 0;
}
public:
BufferCopyInInputStream(unique_ptr<BufferCopyIn> in, size_t bufferSize) : bufferSize_(bufferSize),
buffer_(new uint8_t[bufferSize]),
in_(std::move(in)),
byteCount_(0),
next_(buffer_),
available_(0) {}
~BufferCopyInInputStream() override {
delete[] buffer_;
}
};
namespace {
struct BufferCopyOut {
virtual ~BufferCopyOut() = default;
virtual void write(const uint8_t *b, size_t len) = 0;
};
struct FileBufferCopyOut : public BufferCopyOut {
#ifdef _WIN32
HANDLE h_;
FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
if (h_ == INVALID_HANDLE_VALUE) {
throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError());
}
}
~FileBufferCopyOut() {
::CloseHandle(h_);
}
void write(const uint8_t *b, size_t len) {
while (len > 0) {
DWORD dw = 0;
if (!::WriteFile(h_, b, len, &dw, NULL)) {
throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError());
}
b += dw;
len -= dw;
}
}
#else
const int fd_;
explicit FileBufferCopyOut(const char *filename) : fd_(::open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644)) {
if (fd_ < 0) {
throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno));
}
}
~FileBufferCopyOut() override {
::close(fd_);
}
void write(const uint8_t *b, size_t len) final {
if (::write(fd_, b, len) < 0) {
throw Exception(boost::format("Cannot write file: %1%") % ::strerror(errno));
}
}
#endif
};
struct OStreamBufferCopyOut : public BufferCopyOut {
ostream &os_;
explicit OStreamBufferCopyOut(ostream &os) : os_(os) {
}
void write(const uint8_t *b, size_t len) final {
os_.write(reinterpret_cast<const char *>(b), len);
}
};
} // namespace
class BufferCopyOutputStream : public OutputStream {
size_t bufferSize_;
uint8_t *const buffer_;
unique_ptr<BufferCopyOut> out_;
uint8_t *next_;
size_t available_;
size_t byteCount_;
// Invariant: byteCount_ == bytesWritten + bufferSize_ - available_;
bool next(uint8_t **data, size_t *len) final {
if (available_ == 0) {
flush();
}
*data = next_;
*len = available_;
next_ += available_;
byteCount_ += available_;
available_ = 0;
return true;
}
void backup(size_t len) final {
available_ += len;
next_ -= len;
byteCount_ -= len;
}
uint64_t byteCount() const final {
return byteCount_;
}
void flush() final {
out_->write(buffer_, bufferSize_ - available_);
next_ = buffer_;
available_ = bufferSize_;
}
public:
BufferCopyOutputStream(unique_ptr<BufferCopyOut> out, size_t bufferSize) : bufferSize_(bufferSize),
buffer_(new uint8_t[bufferSize]),
out_(std::move(out)),
next_(buffer_),
available_(bufferSize_), byteCount_(0) {}
~BufferCopyOutputStream() override {
delete[] buffer_;
}
};
unique_ptr<InputStream> fileInputStream(const char *filename,
size_t bufferSize) {
unique_ptr<BufferCopyIn> in(new FileBufferCopyIn(filename));
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
}
unique_ptr<SeekableInputStream> fileSeekableInputStream(const char *filename,
size_t bufferSize) {
unique_ptr<BufferCopyIn> in(new FileBufferCopyIn(filename));
return unique_ptr<SeekableInputStream>(new BufferCopyInInputStream(std::move(in),
bufferSize));
}
unique_ptr<InputStream> istreamInputStream(istream &is, size_t bufferSize) {
unique_ptr<BufferCopyIn> in(new IStreamBufferCopyIn(is));
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
}
unique_ptr<InputStream> nonSeekableIstreamInputStream(
istream &is, size_t bufferSize) {
unique_ptr<BufferCopyIn> in(new NonSeekableIStreamBufferCopyIn(is));
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
}
unique_ptr<OutputStream> fileOutputStream(const char *filename,
size_t bufferSize) {
unique_ptr<BufferCopyOut> out(new FileBufferCopyOut(filename));
return unique_ptr<OutputStream>(new BufferCopyOutputStream(std::move(out), bufferSize));
}
unique_ptr<OutputStream> ostreamOutputStream(ostream &os,
size_t bufferSize) {
unique_ptr<BufferCopyOut> out(new OStreamBufferCopyOut(os));
return unique_ptr<OutputStream>(new BufferCopyOutputStream(std::move(out), bufferSize));
}
} // namespace avro

View File

@ -1,228 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Generic.hh"
#include <utility>
namespace avro {
using std::ostringstream;
using std::string;
using std::vector;
typedef vector<uint8_t> bytes;
void GenericContainer::assertType(const NodePtr &schema, Type type) {
if (schema->type() != type) {
throw Exception(boost::format("Schema type %1 expected %2") % toString(schema->type()) % toString(type));
}
}
GenericReader::GenericReader(ValidSchema s, const DecoderPtr &decoder) : schema_(std::move(s)), isResolving_(dynamic_cast<ResolvingDecoder *>(&(*decoder)) != nullptr),
decoder_(decoder) {
}
GenericReader::GenericReader(const ValidSchema &writerSchema,
const ValidSchema &readerSchema, const DecoderPtr &decoder) : schema_(readerSchema),
isResolving_(true),
decoder_(resolvingDecoder(writerSchema, readerSchema, decoder)) {
}
void GenericReader::read(GenericDatum &datum) const {
datum = GenericDatum(schema_.root());
read(datum, *decoder_, isResolving_);
}
void GenericReader::read(GenericDatum &datum, Decoder &d, bool isResolving) {
if (datum.isUnion()) {
datum.selectBranch(d.decodeUnionIndex());
}
switch (datum.type()) {
case AVRO_NULL:
d.decodeNull();
break;
case AVRO_BOOL:
datum.value<bool>() = d.decodeBool();
break;
case AVRO_INT:
datum.value<int32_t>() = d.decodeInt();
break;
case AVRO_LONG:
datum.value<int64_t>() = d.decodeLong();
break;
case AVRO_FLOAT:
datum.value<float>() = d.decodeFloat();
break;
case AVRO_DOUBLE:
datum.value<double>() = d.decodeDouble();
break;
case AVRO_STRING:
d.decodeString(datum.value<string>());
break;
case AVRO_BYTES:
d.decodeBytes(datum.value<bytes>());
break;
case AVRO_FIXED: {
auto &f = datum.value<GenericFixed>();
d.decodeFixed(f.schema()->fixedSize(), f.value());
} break;
case AVRO_RECORD: {
auto &r = datum.value<GenericRecord>();
size_t c = r.schema()->leaves();
if (isResolving) {
std::vector<size_t> fo =
static_cast<ResolvingDecoder &>(d).fieldOrder();
for (size_t i = 0; i < c; ++i) {
read(r.fieldAt(fo[i]), d, isResolving);
}
} else {
for (size_t i = 0; i < c; ++i) {
read(r.fieldAt(i), d, isResolving);
}
}
} break;
case AVRO_ENUM:
datum.value<GenericEnum>().set(d.decodeEnum());
break;
case AVRO_ARRAY: {
auto &v = datum.value<GenericArray>();
vector<GenericDatum> &r = v.value();
const NodePtr &nn = v.schema()->leafAt(0);
r.resize(0);
size_t start = 0;
for (size_t m = d.arrayStart(); m != 0; m = d.arrayNext()) {
r.resize(r.size() + m);
for (; start < r.size(); ++start) {
r[start] = GenericDatum(nn);
read(r[start], d, isResolving);
}
}
} break;
case AVRO_MAP: {
auto &v = datum.value<GenericMap>();
GenericMap::Value &r = v.value();
const NodePtr &nn = v.schema()->leafAt(1);
r.resize(0);
size_t start = 0;
for (size_t m = d.mapStart(); m != 0; m = d.mapNext()) {
r.resize(r.size() + m);
for (; start < r.size(); ++start) {
d.decodeString(r[start].first);
r[start].second = GenericDatum(nn);
read(r[start].second, d, isResolving);
}
}
} break;
default:
throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type()));
}
}
void GenericReader::read(Decoder &d, GenericDatum &g, const ValidSchema &s) {
g = GenericDatum(s);
read(d, g);
}
void GenericReader::read(Decoder &d, GenericDatum &g) {
read(g, d, dynamic_cast<ResolvingDecoder *>(&d) != nullptr);
}
GenericWriter::GenericWriter(ValidSchema s, EncoderPtr encoder) : schema_(std::move(s)), encoder_(std::move(encoder)) {
}
void GenericWriter::write(const GenericDatum &datum) const {
write(datum, *encoder_);
}
void GenericWriter::write(const GenericDatum &datum, Encoder &e) {
if (datum.isUnion()) {
e.encodeUnionIndex(datum.unionBranch());
}
switch (datum.type()) {
case AVRO_NULL:
e.encodeNull();
break;
case AVRO_BOOL:
e.encodeBool(datum.value<bool>());
break;
case AVRO_INT:
e.encodeInt(datum.value<int32_t>());
break;
case AVRO_LONG:
e.encodeLong(datum.value<int64_t>());
break;
case AVRO_FLOAT:
e.encodeFloat(datum.value<float>());
break;
case AVRO_DOUBLE:
e.encodeDouble(datum.value<double>());
break;
case AVRO_STRING:
e.encodeString(datum.value<string>());
break;
case AVRO_BYTES:
e.encodeBytes(datum.value<bytes>());
break;
case AVRO_FIXED:
e.encodeFixed(datum.value<GenericFixed>().value());
break;
case AVRO_RECORD: {
const auto &r = datum.value<GenericRecord>();
size_t c = r.schema()->leaves();
for (size_t i = 0; i < c; ++i) {
write(r.fieldAt(i), e);
}
} break;
case AVRO_ENUM:
e.encodeEnum(datum.value<GenericEnum>().value());
break;
case AVRO_ARRAY: {
const GenericArray::Value &r = datum.value<GenericArray>().value();
e.arrayStart();
if (!r.empty()) {
e.setItemCount(r.size());
for (const auto &it : r) {
e.startItem();
write(it, e);
}
}
e.arrayEnd();
} break;
case AVRO_MAP: {
const GenericMap::Value &r = datum.value<GenericMap>().value();
e.mapStart();
if (!r.empty()) {
e.setItemCount(r.size());
for (const auto &it : r) {
e.startItem();
e.encodeString(it.first);
write(it.second, e);
}
}
e.mapEnd();
} break;
default:
throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type()));
}
}
void GenericWriter::write(Encoder &e, const GenericDatum &g) {
write(g, e);
}
} // namespace avro

View File

@ -1,98 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "GenericDatum.hh"
#include "NodeImpl.hh"
using std::string;
using std::vector;
namespace avro {
GenericDatum::GenericDatum(const ValidSchema &schema) : type_(schema.root()->type()),
logicalType_(schema.root()->logicalType()) {
init(schema.root());
}
GenericDatum::GenericDatum(const NodePtr &schema) : type_(schema->type()),
logicalType_(schema->logicalType()) {
init(schema);
}
void GenericDatum::init(const NodePtr &schema) {
NodePtr sc = schema;
if (type_ == AVRO_SYMBOLIC) {
sc = resolveSymbol(schema);
type_ = sc->type();
logicalType_ = sc->logicalType();
}
switch (type_) {
case AVRO_NULL: break;
case AVRO_BOOL:
value_ = bool();
break;
case AVRO_INT:
value_ = int32_t();
break;
case AVRO_LONG:
value_ = int64_t();
break;
case AVRO_FLOAT:
value_ = float();
break;
case AVRO_DOUBLE:
value_ = double();
break;
case AVRO_STRING:
value_ = string();
break;
case AVRO_BYTES:
value_ = vector<uint8_t>();
break;
case AVRO_FIXED:
value_ = GenericFixed(sc);
break;
case AVRO_RECORD:
value_ = GenericRecord(sc);
break;
case AVRO_ENUM:
value_ = GenericEnum(sc);
break;
case AVRO_ARRAY:
value_ = GenericArray(sc);
break;
case AVRO_MAP:
value_ = GenericMap(sc);
break;
case AVRO_UNION:
value_ = GenericUnion(sc);
break;
default:
throw Exception(boost::format("Unknown schema type %1%") % toString(type_));
}
}
GenericRecord::GenericRecord(const NodePtr &schema) : GenericContainer(AVRO_RECORD, schema) {
fields_.resize(schema->leaves());
for (size_t i = 0; i < schema->leaves(); ++i) {
fields_[i] = GenericDatum(schema->leafAt(i));
}
}
GenericFixed::GenericFixed(const NodePtr &schema, const vector<uint8_t> &v) : GenericContainer(AVRO_FIXED, schema), value_(v) {}
} // namespace avro

View File

@ -1,83 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "LogicalType.hh"
#include "Exception.hh"
namespace avro {
LogicalType::LogicalType(Type type)
: type_(type), precision_(0), scale_(0) {}
LogicalType::Type LogicalType::type() const {
return type_;
}
void LogicalType::setPrecision(int precision) {
if (type_ != DECIMAL) {
throw Exception("Only logical type DECIMAL can have precision");
}
if (precision <= 0) {
throw Exception(boost::format("Precision cannot be: %1%") % precision);
}
precision_ = precision;
}
void LogicalType::setScale(int scale) {
if (type_ != DECIMAL) {
throw Exception("Only logical type DECIMAL can have scale");
}
if (scale < 0) {
throw Exception(boost::format("Scale cannot be: %1%") % scale);
}
scale_ = scale;
}
void LogicalType::printJson(std::ostream &os) const {
switch (type_) {
case LogicalType::NONE: break;
case LogicalType::DECIMAL:
os << R"("logicalType": "decimal")";
os << ", \"precision\": " << precision_;
os << ", \"scale\": " << scale_;
break;
case DATE:
os << R"("logicalType": "date")";
break;
case TIME_MILLIS:
os << R"("logicalType": "time-millis")";
break;
case TIME_MICROS:
os << R"("logicalType": "time-micros")";
break;
case TIMESTAMP_MILLIS:
os << R"("logicalType": "timestamp-millis")";
break;
case TIMESTAMP_MICROS:
os << R"("logicalType": "timestamp-micros")";
break;
case DURATION:
os << R"("logicalType": "duration")";
break;
case UUID:
os << R"("logicalType": "uuid")";
break;
}
}
} // namespace avro

View File

@ -1,150 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cmath>
#include "Node.hh"
namespace avro {
using std::string;
Node::~Node() = default;
Name::Name(const std::string &name) {
fullname(name);
}
string Name::fullname() const {
return (ns_.empty()) ? simpleName_ : ns_ + "." + simpleName_;
}
void Name::fullname(const string &name) {
string::size_type n = name.find_last_of('.');
if (n == string::npos) {
simpleName_ = name;
ns_.clear();
} else {
ns_ = name.substr(0, n);
simpleName_ = name.substr(n + 1);
}
check();
}
bool Name::operator<(const Name &n) const {
return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_));
}
static bool invalidChar1(char c) {
return !isalnum(c) && c != '_' && c != '.' && c != '$';
}
static bool invalidChar2(char c) {
return !isalnum(c) && c != '_';
}
void Name::check() const {
if (!ns_.empty() && (ns_[0] == '.' || ns_[ns_.size() - 1] == '.' || std::find_if(ns_.begin(), ns_.end(), invalidChar1) != ns_.end())) {
throw Exception("Invalid namespace: " + ns_);
}
if (simpleName_.empty()
|| std::find_if(simpleName_.begin(), simpleName_.end(), invalidChar2) != simpleName_.end()) {
throw Exception("Invalid name: " + simpleName_);
}
}
bool Name::operator==(const Name &n) const {
return ns_ == n.ns_ && simpleName_ == n.simpleName_;
}
void Node::setLogicalType(LogicalType logicalType) {
checkLock();
// Check that the logical type is applicable to the node type.
switch (logicalType.type()) {
case LogicalType::NONE: break;
case LogicalType::DECIMAL: {
if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
throw Exception("DECIMAL logical type can annotate "
"only BYTES or FIXED type");
}
if (type_ == AVRO_FIXED) {
// Max precision that can be supported by the current size of
// the FIXED type.
long maxPrecision = floor(log10(2.0) * (8.0 * fixedSize() - 1));
if (logicalType.precision() > maxPrecision) {
throw Exception(
boost::format(
"DECIMAL precision %1% is too large for the "
"FIXED type of size %2%, precision cannot be "
"larger than %3%")
% logicalType.precision() % fixedSize() % maxPrecision);
}
}
if (logicalType.scale() > logicalType.precision()) {
throw Exception("DECIMAL scale cannot exceed precision");
}
break;
}
case LogicalType::DATE:
if (type_ != AVRO_INT) {
throw Exception("DATE logical type can only annotate INT type");
}
break;
case LogicalType::TIME_MILLIS:
if (type_ != AVRO_INT) {
throw Exception("TIME-MILLIS logical type can only annotate "
"INT type");
}
break;
case LogicalType::TIME_MICROS:
if (type_ != AVRO_LONG) {
throw Exception("TIME-MICROS logical type can only annotate "
"LONG type");
}
break;
case LogicalType::TIMESTAMP_MILLIS:
if (type_ != AVRO_LONG) {
throw Exception("TIMESTAMP-MILLIS logical type can only annotate "
"LONG type");
}
break;
case LogicalType::TIMESTAMP_MICROS:
if (type_ != AVRO_LONG) {
throw Exception("TIMESTAMP-MICROS logical type can only annotate "
"LONG type");
}
break;
case LogicalType::DURATION:
if (type_ != AVRO_FIXED || fixedSize() != 12) {
throw Exception("DURATION logical type can only annotate "
"FIXED type of size 12");
}
break;
case LogicalType::UUID:
if (type_ != AVRO_STRING) {
throw Exception("UUID logical type can only annotate "
"STRING type");
}
break;
}
logicalType_ = logicalType;
}
} // namespace avro

View File

@ -1,540 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "NodeImpl.hh"
#include <sstream>
#include <utility>
using std::string;
namespace avro {
namespace {
// Escape string for serialization.
string escape(const string &unescaped) {
string s;
s.reserve(unescaped.length());
for (char c : unescaped) {
switch (c) {
case '\\':
case '"':
case '/':
s += '\\';
s += c;
break;
case '\b':
s += '\\';
s += 'b';
break;
case '\f':
s += '\f';
break;
case '\n':
s += '\\';
s += 'n';
break;
case '\r':
s += '\\';
s += 'r';
break;
case '\t':
s += '\\';
s += 't';
break;
default:
if (!std::iscntrl(c, std::locale::classic())) {
s += c;
continue;
}
s += intToHex(static_cast<unsigned int>(c));
break;
}
}
return s;
}
// Wrap an indentation in a struct for ostream operator<<
struct indent {
explicit indent(size_t depth) : d(depth) {}
int d;
};
/// ostream operator for indent
std::ostream &operator<<(std::ostream &os, indent x) {
static const string spaces(" ");
while (x.d--) {
os << spaces;
}
return os;
}
} // anonymous namespace
const int kByteStringSize = 6;
SchemaResolution
NodePrimitive::resolve(const Node &reader) const {
if (type() == reader.type()) {
return RESOLVE_MATCH;
}
switch (type()) {
case AVRO_INT:
if (reader.type() == AVRO_LONG) {
return RESOLVE_PROMOTABLE_TO_LONG;
}
// fall-through intentional
case AVRO_LONG:
if (reader.type() == AVRO_FLOAT) {
return RESOLVE_PROMOTABLE_TO_FLOAT;
}
// fall-through intentional
case AVRO_FLOAT:
if (reader.type() == AVRO_DOUBLE) {
return RESOLVE_PROMOTABLE_TO_DOUBLE;
}
default: break;
}
return furtherResolution(reader);
}
SchemaResolution
NodeRecord::resolve(const Node &reader) const {
if (reader.type() == AVRO_RECORD) {
if (name() == reader.name()) {
return RESOLVE_MATCH;
}
}
return furtherResolution(reader);
}
SchemaResolution
NodeEnum::resolve(const Node &reader) const {
if (reader.type() == AVRO_ENUM) {
return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
}
return furtherResolution(reader);
}
SchemaResolution
NodeArray::resolve(const Node &reader) const {
if (reader.type() == AVRO_ARRAY) {
const NodePtr &arrayType = leafAt(0);
return arrayType->resolve(*reader.leafAt(0));
}
return furtherResolution(reader);
}
SchemaResolution
NodeMap::resolve(const Node &reader) const {
if (reader.type() == AVRO_MAP) {
const NodePtr &mapType = leafAt(1);
return mapType->resolve(*reader.leafAt(1));
}
return furtherResolution(reader);
}
SchemaResolution
NodeUnion::resolve(const Node &reader) const {
// If the writer is union, resolution only needs to occur when the selected
// type of the writer is known, so this function is not very helpful.
//
// In this case, this function returns if there is a possible match given
// any writer type, so just search type by type returning the best match
// found.
SchemaResolution match = RESOLVE_NO_MATCH;
for (size_t i = 0; i < leaves(); ++i) {
const NodePtr &node = leafAt(i);
SchemaResolution thisMatch = node->resolve(reader);
if (thisMatch == RESOLVE_MATCH) {
match = thisMatch;
break;
}
if (match == RESOLVE_NO_MATCH) {
match = thisMatch;
}
}
return match;
}
SchemaResolution
NodeFixed::resolve(const Node &reader) const {
if (reader.type() == AVRO_FIXED) {
return (
(reader.fixedSize() == fixedSize()) && (reader.name() == name()))
? RESOLVE_MATCH
: RESOLVE_NO_MATCH;
}
return furtherResolution(reader);
}
SchemaResolution
NodeSymbolic::resolve(const Node &reader) const {
const NodePtr &node = leafAt(0);
return node->resolve(reader);
}
void NodePrimitive::printJson(std::ostream &os, size_t depth) const {
bool hasLogicalType = logicalType().type() != LogicalType::NONE;
if (hasLogicalType) {
os << "{\n"
<< indent(depth) << "\"type\": ";
}
os << '\"' << type() << '\"';
if (hasLogicalType) {
os << ",\n"
<< indent(depth);
logicalType().printJson(os);
os << "\n}";
}
if (!getDoc().empty()) {
os << ",\n"
<< indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\"";
}
}
void NodeSymbolic::printJson(std::ostream &os, size_t depth) const {
os << '\"' << nameAttribute_.get() << '\"';
if (!getDoc().empty()) {
os << ",\n"
<< indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\"";
}
}
static void printName(std::ostream &os, const Name &n, size_t depth) {
if (!n.ns().empty()) {
os << indent(depth) << R"("namespace": ")" << n.ns() << "\",\n";
}
os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n";
}
void NodeRecord::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(++depth) << "\"type\": \"record\",\n";
printName(os, nameAttribute_.get(), depth);
if (!getDoc().empty()) {
os << indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
os << indent(depth) << "\"fields\": [";
size_t fields = leafAttributes_.size();
++depth;
// Serialize "default" field:
assert(defaultValues.empty() || (defaultValues.size() == fields));
for (size_t i = 0; i < fields; ++i) {
if (i > 0) {
os << ',';
}
os << '\n'
<< indent(depth) << "{\n";
os << indent(++depth) << R"("name": ")" << leafNameAttributes_.get(i) << "\",\n";
os << indent(depth) << "\"type\": ";
leafAttributes_.get(i)->printJson(os, depth);
if (!defaultValues.empty()) {
if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) {
// No "default" field.
} else {
os << ",\n"
<< indent(depth) << "\"default\": ";
leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
depth);
}
}
os << '\n';
os << indent(--depth) << '}';
}
os << '\n'
<< indent(--depth) << "]\n";
os << indent(--depth) << '}';
}
void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(isPrimitive(g.type()));
switch (g.type()) {
case AVRO_NULL:
os << "null";
break;
case AVRO_BOOL:
os << (g.value<bool>() ? "true" : "false");
break;
case AVRO_INT:
os << g.value<int32_t>();
break;
case AVRO_LONG:
os << g.value<int64_t>();
break;
case AVRO_FLOAT:
os << g.value<float>();
break;
case AVRO_DOUBLE:
os << g.value<double>();
break;
case AVRO_STRING:
os << "\"" << escape(g.value<string>()) << "\"";
break;
case AVRO_BYTES: {
// Convert to a string:
const auto &vg = g.value<std::vector<uint8_t>>();
string s;
s.resize(vg.size() * kByteStringSize);
for (unsigned int i = 0; i < vg.size(); i++) {
string hex_string = intToHex(static_cast<int>(vg[i]));
s.replace(i * kByteStringSize, kByteStringSize, hex_string);
}
os << "\"" << s << "\"";
} break;
default: break;
}
}
void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(g.type() == AVRO_ENUM);
os << "\"" << g.value<GenericEnum>().symbol() << "\"";
}
void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(g.type() == AVRO_FIXED);
// ex: "\uOOff"
// Convert to a string
const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
string s;
s.resize(vg.size() * kByteStringSize);
for (unsigned int i = 0; i < vg.size(); i++) {
string hex_string = intToHex(static_cast<int>(vg[i]));
s.replace(i * kByteStringSize, kByteStringSize, hex_string);
}
os << "\"" << s << "\"";
}
void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
leafAt(0)->printDefaultToJson(g, os, depth);
}
void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(g.type() == AVRO_ARRAY);
// ex: "default": [1]
if (g.value<GenericArray>().value().empty()) {
os << "[]";
} else {
os << "[\n";
depth++;
// Serialize all values of the array with recursive calls:
for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
if (i > 0) {
os << ",\n";
}
os << indent(depth);
leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
depth);
}
os << "\n"
<< indent(--depth) << "]";
}
}
void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
getNode()->printDefaultToJson(g, os, depth);
}
void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(g.type() == AVRO_RECORD);
if (g.value<GenericRecord>().fieldCount() == 0) {
os << "{}";
} else {
os << "{\n";
// Serialize all fields of the record with recursive calls:
for (size_t i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
if (i == 0) {
++depth;
} else { // i > 0
os << ",\n";
}
os << indent(depth) << "\"";
assert(i < leaves());
os << leafNameAttributes_.get(i);
os << "\": ";
// Recursive call on child node to be able to get the name attribute
// (In case of a record we need the name of the leaves (contained in
// 'this'))
leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
depth);
}
os << "\n"
<< indent(--depth) << "}";
}
}
NodeRecord::NodeRecord(const HasName &name,
const MultiLeaves &fields,
const LeafNames &fieldsNames,
std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()),
defaultValues(std::move(dv)) {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
throw Exception(boost::format(
"Cannot add duplicate field: %1%")
% leafNameAttributes_.get(i));
}
}
}
void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
size_t depth) const {
assert(g.type() == AVRO_MAP);
if (g.value<GenericMap>().value().empty()) {
os << "{}";
} else {
os << "{\n";
for (size_t i = 0; i < g.value<GenericMap>().value().size(); i++) {
if (i == 0) {
++depth;
} else {
os << ",\n";
}
os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
<< "\": ";
leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
depth);
}
os << "\n"
<< indent(--depth) << "}";
}
}
void NodeEnum::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(++depth) << "\"type\": \"enum\",\n";
if (!getDoc().empty()) {
os << indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
printName(os, nameAttribute_.get(), depth);
os << indent(depth) << "\"symbols\": [\n";
int names = leafNameAttributes_.size();
++depth;
for (int i = 0; i < names; ++i) {
if (i > 0) {
os << ",\n";
}
os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
}
os << '\n';
os << indent(--depth) << "]\n";
os << indent(--depth) << '}';
}
void NodeArray::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(depth + 1) << "\"type\": \"array\",\n";
if (!getDoc().empty()) {
os << indent(depth + 1) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
os << indent(depth + 1) << "\"items\": ";
leafAttributes_.get()->printJson(os, depth + 1);
os << '\n';
os << indent(depth) << '}';
}
void NodeMap::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(depth + 1) << "\"type\": \"map\",\n";
if (!getDoc().empty()) {
os << indent(depth + 1) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
os << indent(depth + 1) << "\"values\": ";
leafAttributes_.get(1)->printJson(os, depth + 1);
os << '\n';
os << indent(depth) << '}';
}
NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
NodePtr key(new NodePrimitive(AVRO_STRING));
doAddLeaf(key);
}
void NodeUnion::printJson(std::ostream &os, size_t depth) const {
os << "[\n";
int fields = leafAttributes_.size();
++depth;
for (int i = 0; i < fields; ++i) {
if (i > 0) {
os << ",\n";
}
os << indent(depth);
leafAttributes_.get(i)->printJson(os, depth);
}
os << '\n';
os << indent(--depth) << ']';
}
void NodeFixed::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(++depth) << "\"type\": \"fixed\",\n";
if (!getDoc().empty()) {
os << indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
printName(os, nameAttribute_.get(), depth);
os << indent(depth) << "\"size\": " << sizeAttribute_.get();
if (logicalType().type() != LogicalType::NONE) {
os << ",\n"
<< indent(depth);
logicalType().printJson(os);
}
os << "\n"
<< indent(--depth) << '}';
}
} // namespace avro

View File

@ -1,744 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Resolver.hh"
#include "AvroTraits.hh"
#include "Layout.hh"
#include "NodeImpl.hh"
#include "Reader.hh"
#include "ValidSchema.hh"
#include <memory>
namespace avro {
using std::unique_ptr;
class ResolverFactory;
typedef std::shared_ptr<Resolver> ResolverPtr;
typedef std::vector<std::unique_ptr<Resolver>> ResolverPtrVector;
// #define DEBUG_VERBOSE
#ifdef DEBUG_VERBOSE
#define DEBUG_OUT(str) std::cout << str << '\n'
#else
class NoOp {};
template<typename T>
NoOp &operator<<(NoOp &noOp, const T &) {
return noOp;
}
NoOp noop;
#define DEBUG_OUT(str) noop << str
#endif
template<typename T>
class PrimitiveSkipper : public Resolver {
public:
PrimitiveSkipper() : Resolver() {}
void parse(Reader &reader, uint8_t *address) const final {
T val;
reader.readValue(val);
DEBUG_OUT("Skipping " << val);
}
};
template<typename T>
class PrimitiveParser : public Resolver {
public:
explicit PrimitiveParser(const PrimitiveLayout &offset) : Resolver(),
offset_(offset.offset()) {}
void parse(Reader &reader, uint8_t *address) const final {
T *location = reinterpret_cast<T *>(address + offset_);
reader.readValue(*location);
DEBUG_OUT("Reading " << *location);
}
private:
size_t offset_;
};
template<typename WT, typename RT>
class PrimitivePromoter : public Resolver {
public:
explicit PrimitivePromoter(const PrimitiveLayout &offset) : Resolver(),
offset_(offset.offset()) {}
void parse(Reader &reader, uint8_t *address) const final {
parseIt<WT>(reader, address);
}
private:
void parseIt(Reader &reader, uint8_t *address, const std::true_type &) const {
WT val;
reader.readValue(val);
RT *location = reinterpret_cast<RT *>(address + offset_);
*location = static_cast<RT>(val);
DEBUG_OUT("Promoting " << val);
}
void parseIt(Reader &reader, uint8_t *, const std::false_type &) const {}
template<typename T>
void parseIt(Reader &reader, uint8_t *address) const {
parseIt(reader, address, is_promotable<T>());
}
size_t offset_;
};
template<>
class PrimitiveSkipper<std::vector<uint8_t>> : public Resolver {
public:
PrimitiveSkipper() : Resolver() {}
void parse(Reader &reader, uint8_t *address) const final {
std::vector<uint8_t> val;
reader.readBytes(val);
DEBUG_OUT("Skipping bytes");
}
};
template<>
class PrimitiveParser<std::vector<uint8_t>> : public Resolver {
public:
explicit PrimitiveParser(const PrimitiveLayout &offset) : Resolver(),
offset_(offset.offset()) {}
void parse(Reader &reader, uint8_t *address) const final {
auto *location = reinterpret_cast<std::vector<uint8_t> *>(address + offset_);
reader.readBytes(*location);
DEBUG_OUT("Reading bytes");
}
private:
size_t offset_;
};
class RecordSkipper : public Resolver {
public:
RecordSkipper(ResolverFactory &factory, const NodePtr &writer);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Skipping record");
reader.readRecord();
size_t steps = resolvers_.size();
for (size_t i = 0; i < steps; ++i) {
resolvers_[i]->parse(reader, address);
}
}
protected:
ResolverPtrVector resolvers_;
};
class RecordParser : public Resolver {
public:
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading record");
reader.readRecord();
size_t steps = resolvers_.size();
for (size_t i = 0; i < steps; ++i) {
resolvers_[i]->parse(reader, address);
}
}
RecordParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
protected:
ResolverPtrVector resolvers_;
};
class MapSkipper : public Resolver {
public:
MapSkipper(ResolverFactory &factory, const NodePtr &writer);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Skipping map");
std::string key;
int64_t size;
do {
size = reader.readMapBlockSize();
for (auto i = 0; i < size; ++i) {
reader.readValue(key);
resolver_->parse(reader, address);
}
} while (size != 0);
}
protected:
ResolverPtr resolver_;
};
class MapParser : public Resolver {
public:
typedef uint8_t *(*GenericMapSetter)(uint8_t *map, const std::string &key);
MapParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading map");
uint8_t *mapAddress = address + offset_;
std::string key;
auto *setter = reinterpret_cast<GenericMapSetter *>(address + setFuncOffset_);
int64_t size;
do {
size = reader.readMapBlockSize();
for (auto i = 0; i < size; ++i) {
reader.readValue(key);
// create a new map entry and get the address
uint8_t *location = (*setter)(mapAddress, key);
resolver_->parse(reader, location);
}
} while (size != 0);
}
protected:
ResolverPtr resolver_;
size_t offset_;
size_t setFuncOffset_;
};
class ArraySkipper : public Resolver {
public:
ArraySkipper(ResolverFactory &factory, const NodePtr &writer);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Skipping array");
int64_t size;
do {
size = reader.readArrayBlockSize();
for (auto i = 0; i < size; ++i) {
resolver_->parse(reader, address);
}
} while (size != 0);
}
protected:
ResolverPtr resolver_;
};
typedef uint8_t *(*GenericArraySetter)(uint8_t *array);
class ArrayParser : public Resolver {
public:
ArrayParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading array");
uint8_t *arrayAddress = address + offset_;
auto *setter = reinterpret_cast<GenericArraySetter *>(address + setFuncOffset_);
int64_t size;
do {
size = reader.readArrayBlockSize();
for (auto i = 0; i < size; ++i) {
// create a new map entry and get the address
uint8_t *location = (*setter)(arrayAddress);
resolver_->parse(reader, location);
}
} while (size != 0);
}
protected:
ArrayParser() : Resolver(), offset_(0), setFuncOffset_(0) {}
ResolverPtr resolver_;
size_t offset_;
size_t setFuncOffset_;
};
class EnumSkipper : public Resolver {
public:
EnumSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {}
void parse(Reader &reader, uint8_t *address) const final {
int64_t val = reader.readEnum();
DEBUG_OUT("Skipping enum" << val);
}
};
class EnumParser : public Resolver {
public:
enum EnumRepresentation {
VAL
};
EnumParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(),
offset_(offsets.at(0).offset()),
readerSize_(reader->names()) {
const size_t writerSize = writer->names();
mapping_.reserve(writerSize);
for (size_t i = 0; i < writerSize; ++i) {
const std::string &name = writer->nameAt(i);
size_t readerIndex = readerSize_;
reader->nameIndex(name, readerIndex);
mapping_.push_back(readerIndex);
}
}
void parse(Reader &reader, uint8_t *address) const final {
auto val = static_cast<size_t>(reader.readEnum());
assert(static_cast<size_t>(val) < mapping_.size());
if (mapping_[val] < readerSize_) {
auto *location = reinterpret_cast<EnumRepresentation *>(address + offset_);
*location = static_cast<EnumRepresentation>(mapping_[val]);
DEBUG_OUT("Setting enum" << *location);
}
}
protected:
size_t offset_;
size_t readerSize_;
std::vector<size_t> mapping_;
};
class UnionSkipper : public Resolver {
public:
UnionSkipper(ResolverFactory &factory, const NodePtr &writer);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Skipping union");
auto choice = static_cast<size_t>(reader.readUnion());
resolvers_[choice]->parse(reader, address);
}
protected:
ResolverPtrVector resolvers_;
};
class UnionParser : public Resolver {
public:
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
UnionParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading union");
auto writerChoice = static_cast<size_t>(reader.readUnion());
auto *readerChoice = reinterpret_cast<int64_t *>(address + choiceOffset_);
*readerChoice = choiceMapping_[writerChoice];
auto *setter = reinterpret_cast<GenericUnionSetter *>(address + setFuncOffset_);
auto *value = reinterpret_cast<uint8_t *>(address + offset_);
uint8_t *location = (*setter)(value, *readerChoice);
resolvers_[writerChoice]->parse(reader, location);
}
protected:
ResolverPtrVector resolvers_;
std::vector<int64_t> choiceMapping_;
size_t offset_;
size_t choiceOffset_;
size_t setFuncOffset_;
};
class UnionToNonUnionParser : public Resolver {
public:
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
UnionToNonUnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const Layout &offsets);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading union to non-union");
auto choice = static_cast<size_t>(reader.readUnion());
resolvers_[choice]->parse(reader, address);
}
protected:
ResolverPtrVector resolvers_;
};
class NonUnionToUnionParser : public Resolver {
public:
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
NonUnionToUnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets);
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading non-union to union");
auto *choice = reinterpret_cast<int64_t *>(address + choiceOffset_);
*choice = choice_;
auto *setter = reinterpret_cast<GenericUnionSetter *>(address + setFuncOffset_);
auto *value = reinterpret_cast<uint8_t *>(address + offset_);
uint8_t *location = (*setter)(value, choice_);
resolver_->parse(reader, location);
}
protected:
ResolverPtr resolver_;
size_t choice_;
size_t offset_;
size_t choiceOffset_;
size_t setFuncOffset_;
};
class FixedSkipper : public Resolver {
public:
FixedSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
size_ = writer->fixedSize();
}
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Skipping fixed");
std::unique_ptr<uint8_t[]> val(new uint8_t[size_]);
reader.readFixed(&val[0], size_);
}
protected:
int size_;
};
class FixedParser : public Resolver {
public:
FixedParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver() {
size_ = writer->fixedSize();
offset_ = offsets.at(0).offset();
}
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading fixed");
auto *location = reinterpret_cast<uint8_t *>(address + offset_);
reader.readFixed(location, size_);
}
protected:
int size_;
size_t offset_;
};
class ResolverFactory : private boost::noncopyable {
template<typename T>
unique_ptr<Resolver>
constructPrimitiveSkipper(const NodePtr &writer) {
return unique_ptr<Resolver>(new PrimitiveSkipper<T>());
}
template<typename T>
unique_ptr<Resolver>
constructPrimitive(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
unique_ptr<Resolver> instruction;
SchemaResolution match = writer->resolve(*reader);
if (match == RESOLVE_NO_MATCH) {
instruction = unique_ptr<Resolver>(new PrimitiveSkipper<T>());
} else if (reader->type() == AVRO_UNION) {
const auto &compoundLayout = static_cast<const CompoundLayout &>(offset);
instruction = unique_ptr<Resolver>(new NonUnionToUnionParser(*this, writer, reader, compoundLayout));
} else if (match == RESOLVE_MATCH) {
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
instruction = unique_ptr<Resolver>(new PrimitiveParser<T>(primitiveLayout));
} else if (match == RESOLVE_PROMOTABLE_TO_LONG) {
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, int64_t>(primitiveLayout));
} else if (match == RESOLVE_PROMOTABLE_TO_FLOAT) {
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, float>(primitiveLayout));
} else if (match == RESOLVE_PROMOTABLE_TO_DOUBLE) {
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, double>(primitiveLayout));
} else {
assert(0);
}
return instruction;
}
template<typename Skipper>
unique_ptr<Resolver>
constructCompoundSkipper(const NodePtr &writer) {
return unique_ptr<Resolver>(new Skipper(*this, writer));
}
template<typename Parser, typename Skipper>
unique_ptr<Resolver>
constructCompound(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
unique_ptr<Resolver> instruction;
avro::SchemaResolution match = writer->resolve(*reader);
if (match == RESOLVE_NO_MATCH) {
instruction = unique_ptr<Resolver>(new Skipper(*this, writer));
} else if (writer->type() != AVRO_UNION && reader->type() == AVRO_UNION) {
const auto &compoundLayout = dynamic_cast<const CompoundLayout &>(offset);
instruction = unique_ptr<Resolver>(new NonUnionToUnionParser(*this, writer, reader, compoundLayout));
} else if (writer->type() == AVRO_UNION && reader->type() != AVRO_UNION) {
instruction = unique_ptr<Resolver>(new UnionToNonUnionParser(*this, writer, reader, offset));
} else {
const auto &compoundLayout = dynamic_cast<const CompoundLayout &>(offset);
instruction = unique_ptr<Resolver>(new Parser(*this, writer, reader, compoundLayout));
}
return instruction;
}
public:
unique_ptr<Resolver>
construct(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
typedef unique_ptr<Resolver> (ResolverFactory::*BuilderFunc)(const NodePtr &writer, const NodePtr &reader, const Layout &offset);
NodePtr currentWriter = (writer->type() == AVRO_SYMBOLIC) ? resolveSymbol(writer) : writer;
NodePtr currentReader = (reader->type() == AVRO_SYMBOLIC) ? resolveSymbol(reader) : reader;
static const BuilderFunc funcs[] = {
&ResolverFactory::constructPrimitive<std::string>,
&ResolverFactory::constructPrimitive<std::vector<uint8_t>>,
&ResolverFactory::constructPrimitive<int32_t>,
&ResolverFactory::constructPrimitive<int64_t>,
&ResolverFactory::constructPrimitive<float>,
&ResolverFactory::constructPrimitive<double>,
&ResolverFactory::constructPrimitive<bool>,
&ResolverFactory::constructPrimitive<Null>,
&ResolverFactory::constructCompound<RecordParser, RecordSkipper>,
&ResolverFactory::constructCompound<EnumParser, EnumSkipper>,
&ResolverFactory::constructCompound<ArrayParser, ArraySkipper>,
&ResolverFactory::constructCompound<MapParser, MapSkipper>,
&ResolverFactory::constructCompound<UnionParser, UnionSkipper>,
&ResolverFactory::constructCompound<FixedParser, FixedSkipper>};
static_assert((sizeof(funcs) / sizeof(BuilderFunc)) == (AVRO_NUM_TYPES),
"Invalid number of builder functions");
BuilderFunc func = funcs[currentWriter->type()];
assert(func);
return ((this)->*(func))(currentWriter, currentReader, offset);
}
unique_ptr<Resolver>
skipper(const NodePtr &writer) {
typedef unique_ptr<Resolver> (ResolverFactory::*BuilderFunc)(const NodePtr &writer);
NodePtr currentWriter = (writer->type() == AVRO_SYMBOLIC) ? writer->leafAt(0) : writer;
static const BuilderFunc funcs[] = {
&ResolverFactory::constructPrimitiveSkipper<std::string>,
&ResolverFactory::constructPrimitiveSkipper<std::vector<uint8_t>>,
&ResolverFactory::constructPrimitiveSkipper<int32_t>,
&ResolverFactory::constructPrimitiveSkipper<int64_t>,
&ResolverFactory::constructPrimitiveSkipper<float>,
&ResolverFactory::constructPrimitiveSkipper<double>,
&ResolverFactory::constructPrimitiveSkipper<bool>,
&ResolverFactory::constructPrimitiveSkipper<Null>,
&ResolverFactory::constructCompoundSkipper<RecordSkipper>,
&ResolverFactory::constructCompoundSkipper<EnumSkipper>,
&ResolverFactory::constructCompoundSkipper<ArraySkipper>,
&ResolverFactory::constructCompoundSkipper<MapSkipper>,
&ResolverFactory::constructCompoundSkipper<UnionSkipper>,
&ResolverFactory::constructCompoundSkipper<FixedSkipper>};
static_assert((sizeof(funcs) / sizeof(BuilderFunc)) == (AVRO_NUM_TYPES),
"Invalid number of builder functions");
BuilderFunc func = funcs[currentWriter->type()];
assert(func);
return ((this)->*(func))(currentWriter);
}
};
RecordSkipper::RecordSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
size_t leaves = writer->leaves();
resolvers_.reserve(leaves);
for (size_t i = 0; i < leaves; ++i) {
const NodePtr &w = writer->leafAt(i);
resolvers_.push_back(factory.skipper(w));
}
}
RecordParser::RecordParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver() {
size_t leaves = writer->leaves();
resolvers_.reserve(leaves);
for (size_t i = 0; i < leaves; ++i) {
const NodePtr &w = writer->leafAt(i);
const std::string &name = writer->nameAt(i);
size_t readerIndex = 0;
bool found = reader->nameIndex(name, readerIndex);
if (found) {
const NodePtr &r = reader->leafAt(readerIndex);
resolvers_.push_back(factory.construct(w, r, offsets.at(readerIndex)));
} else {
resolvers_.push_back(factory.skipper(w));
}
}
}
MapSkipper::MapSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver(),
resolver_(factory.skipper(writer->leafAt(1))) {}
MapParser::MapParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver(),
resolver_(factory.construct(writer->leafAt(1), reader->leafAt(1), offsets.at(1))),
offset_(offsets.offset()),
setFuncOffset_(offsets.at(0).offset()) {}
ArraySkipper::ArraySkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver(),
resolver_(factory.skipper(writer->leafAt(0))) {}
ArrayParser::ArrayParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver(),
resolver_(factory.construct(writer->leafAt(0), reader->leafAt(0), offsets.at(1))),
offset_(offsets.offset()),
setFuncOffset_(offsets.at(0).offset()) {}
UnionSkipper::UnionSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
size_t leaves = writer->leaves();
resolvers_.reserve(leaves);
for (size_t i = 0; i < leaves; ++i) {
const NodePtr &w = writer->leafAt(i);
resolvers_.push_back(factory.skipper(w));
}
}
namespace {
// assumes the writer is NOT a union, and the reader IS a union
SchemaResolution
checkUnionMatch(const NodePtr &writer, const NodePtr &reader, size_t &index) {
SchemaResolution bestMatch = RESOLVE_NO_MATCH;
index = 0;
size_t leaves = reader->leaves();
for (size_t i = 0; i < leaves; ++i) {
const NodePtr &leaf = reader->leafAt(i);
SchemaResolution newMatch = writer->resolve(*leaf);
if (newMatch == RESOLVE_MATCH) {
bestMatch = newMatch;
index = i;
break;
}
if (bestMatch == RESOLVE_NO_MATCH) {
bestMatch = newMatch;
index = i;
}
}
return bestMatch;
}
} // namespace
UnionParser::UnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver(),
offset_(offsets.offset()),
choiceOffset_(offsets.at(0).offset()),
setFuncOffset_(offsets.at(1).offset()) {
size_t leaves = writer->leaves();
resolvers_.reserve(leaves);
choiceMapping_.reserve(leaves);
for (size_t i = 0; i < leaves; ++i) {
// for each writer, we need a schema match for the reader
const NodePtr &w = writer->leafAt(i);
size_t index = 0;
SchemaResolution match = checkUnionMatch(w, reader, index);
if (match == RESOLVE_NO_MATCH) {
resolvers_.push_back(factory.skipper(w));
// push back a non-sense number
choiceMapping_.push_back(reader->leaves());
} else {
const NodePtr &r = reader->leafAt(index);
resolvers_.push_back(factory.construct(w, r, offsets.at(index + 2)));
choiceMapping_.push_back(index);
}
}
}
NonUnionToUnionParser::NonUnionToUnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver(),
offset_(offsets.offset()),
choice_(0),
choiceOffset_(offsets.at(0).offset()),
setFuncOffset_(offsets.at(1).offset()) {
#ifndef NDEBUG
SchemaResolution bestMatch =
#endif
checkUnionMatch(writer, reader, choice_);
assert(bestMatch != RESOLVE_NO_MATCH);
resolver_ = factory.construct(writer, reader->leafAt(choice_), offsets.at(choice_ + 2));
}
UnionToNonUnionParser::UnionToNonUnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const Layout &offsets) : Resolver() {
size_t leaves = writer->leaves();
resolvers_.reserve(leaves);
for (size_t i = 0; i < leaves; ++i) {
const NodePtr &w = writer->leafAt(i);
resolvers_.push_back(factory.construct(w, reader, offsets));
}
}
unique_ptr<Resolver> constructResolver(const ValidSchema &writerSchema,
const ValidSchema &readerSchema,
const Layout &readerLayout) {
ResolverFactory factory;
return factory.construct(writerSchema.root(), readerSchema.root(), readerLayout);
}
} // namespace avro

View File

@ -1,35 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ResolverSchema.hh"
#include "Resolver.hh"
#include "ValidSchema.hh"
namespace avro {
ResolverSchema::ResolverSchema(
const ValidSchema &writerSchema,
const ValidSchema &readerSchema,
const Layout &readerLayout) : resolver_(constructResolver(writerSchema, readerSchema, readerLayout)) {}
void ResolverSchema::parse(Reader &reader, uint8_t *address) {
resolver_->parse(reader, address);
}
} // namespace avro

View File

@ -1,98 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <utility>
#include "Schema.hh"
namespace avro {
RecordSchema::RecordSchema(const std::string &name) : Schema(new NodeRecord) {
node_->setName(Name(name));
}
void RecordSchema::addField(const std::string &name, const Schema &fieldSchema) {
// add the name first. it will throw if the name is a duplicate, preventing
// the leaf from being added
node_->addName(name);
node_->addLeaf(fieldSchema.root());
}
std::string RecordSchema::getDoc() const {
return node_->getDoc();
}
void RecordSchema::setDoc(const std::string &doc) {
node_->setDoc(doc);
}
EnumSchema::EnumSchema(const std::string &name) : Schema(new NodeEnum) {
node_->setName(Name(name));
}
void EnumSchema::addSymbol(const std::string &symbol) {
node_->addName(symbol);
}
ArraySchema::ArraySchema(const Schema &itemsSchema) : Schema(new NodeArray) {
node_->addLeaf(itemsSchema.root());
}
ArraySchema::ArraySchema(const ArraySchema &itemsSchema) : Schema(new NodeArray) {
node_->addLeaf(itemsSchema.root());
}
MapSchema::MapSchema(const Schema &valuesSchema) : Schema(new NodeMap) {
node_->addLeaf(valuesSchema.root());
}
MapSchema::MapSchema(const MapSchema &valuesSchema) : Schema(new NodeMap) {
node_->addLeaf(valuesSchema.root());
}
UnionSchema::UnionSchema() : Schema(new NodeUnion) {}
void UnionSchema::addType(const Schema &typeSchema) {
if (typeSchema.type() == AVRO_UNION) {
throw Exception("Cannot add unions to unions");
}
if (typeSchema.type() == AVRO_RECORD) {
// check for duplicate records
size_t types = node_->leaves();
for (size_t i = 0; i < types; ++i) {
const NodePtr &leaf = node_->leafAt(i);
// TODO, more checks?
if (leaf->type() == AVRO_RECORD && leaf->name() == typeSchema.root()->name()) {
throw Exception("Records in unions cannot have duplicate names");
}
}
}
node_->addLeaf(typeSchema.root());
}
FixedSchema::FixedSchema(int size, const std::string &name) : Schema(new NodeFixed) {
node_->setFixedSize(size);
node_->setName(Name(name));
}
SymbolicSchema::SymbolicSchema(const Name &name, const NodePtr &link) : Schema(new NodeSymbolic(HasName(name), link)) {
}
} // namespace avro

View File

@ -1,189 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Stream.hh"
#include <vector>
namespace avro {
using std::vector;
class MemoryInputStream : public InputStream {
const std::vector<uint8_t *> &data_;
const size_t chunkSize_;
const size_t size_;
const size_t available_;
size_t cur_;
size_t curLen_;
size_t maxLen() {
size_t n = (cur_ == (size_ - 1)) ? available_ : chunkSize_;
if (n == curLen_) {
if (cur_ == (size_ - 1)) {
return 0;
}
++cur_;
n = (cur_ == (size_ - 1)) ? available_ : chunkSize_;
curLen_ = 0;
}
return n;
}
public:
MemoryInputStream(const std::vector<uint8_t *> &b,
size_t chunkSize, size_t available) : data_(b), chunkSize_(chunkSize), size_(b.size()),
available_(available), cur_(0), curLen_(0) {}
bool next(const uint8_t **data, size_t *len) final {
if (size_t n = maxLen()) {
*data = data_[cur_] + curLen_;
*len = n - curLen_;
curLen_ = n;
return true;
}
return false;
}
void backup(size_t len) final {
curLen_ -= len;
}
void skip(size_t len) final {
while (len > 0) {
if (size_t n = maxLen()) {
if ((curLen_ + len) < n) {
n = curLen_ + len;
}
len -= n - curLen_;
curLen_ = n;
} else {
break;
}
}
}
size_t byteCount() const final {
return cur_ * chunkSize_ + curLen_;
}
};
class MemoryInputStream2 : public InputStream {
const uint8_t *const data_;
const size_t size_;
size_t curLen_;
public:
MemoryInputStream2(const uint8_t *data, size_t len)
: data_(data), size_(len), curLen_(0) {}
bool next(const uint8_t **data, size_t *len) final {
if (curLen_ == size_) {
return false;
}
*data = &data_[curLen_];
*len = size_ - curLen_;
curLen_ = size_;
return true;
}
void backup(size_t len) final {
curLen_ -= len;
}
void skip(size_t len) final {
if (len > (size_ - curLen_)) {
len = size_ - curLen_;
}
curLen_ += len;
}
size_t byteCount() const final {
return curLen_;
}
};
class MemoryOutputStream : public OutputStream {
public:
const size_t chunkSize_;
std::vector<uint8_t *> data_;
size_t available_;
size_t byteCount_;
explicit MemoryOutputStream(size_t chunkSize) : chunkSize_(chunkSize),
available_(0), byteCount_(0) {}
~MemoryOutputStream() final {
for (std::vector<uint8_t *>::const_iterator it = data_.begin();
it != data_.end(); ++it) {
delete[] * it;
}
}
bool next(uint8_t **data, size_t *len) final {
if (available_ == 0) {
data_.push_back(new uint8_t[chunkSize_]);
available_ = chunkSize_;
}
*data = &data_.back()[chunkSize_ - available_];
*len = available_;
byteCount_ += available_;
available_ = 0;
return true;
}
void backup(size_t len) final {
available_ += len;
byteCount_ -= len;
}
uint64_t byteCount() const final {
return byteCount_;
}
void flush() final {}
};
std::unique_ptr<OutputStream> memoryOutputStream(size_t chunkSize) {
return std::unique_ptr<OutputStream>(new MemoryOutputStream(chunkSize));
}
std::unique_ptr<InputStream> memoryInputStream(const uint8_t *data, size_t len) {
return std::unique_ptr<InputStream>(new MemoryInputStream2(data, len));
}
std::unique_ptr<InputStream> memoryInputStream(const OutputStream &source) {
const auto &mos =
dynamic_cast<const MemoryOutputStream &>(source);
return (mos.data_.empty()) ? std::unique_ptr<InputStream>(new MemoryInputStream2(nullptr, 0)) : std::unique_ptr<InputStream>(new MemoryInputStream(mos.data_, mos.chunkSize_, (mos.chunkSize_ - mos.available_)));
}
std::shared_ptr<std::vector<uint8_t>> snapshot(const OutputStream &source) {
const auto &mos =
dynamic_cast<const MemoryOutputStream &>(source);
std::shared_ptr<std::vector<uint8_t>> result(new std::vector<uint8_t>());
size_t c = mos.byteCount_;
result->reserve(mos.byteCount_);
for (auto it = mos.data_.begin();
it != mos.data_.end(); ++it) {
size_t n = std::min(c, mos.chunkSize_);
std::copy(*it, *it + n, std::back_inserter(*result));
c -= n;
}
return result;
}
} // namespace avro

View File

@ -1,74 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Types.hh"
#include <iostream>
#include <string>
namespace avro {
namespace strings {
const std::string typeToString[] = {
"string",
"bytes",
"int",
"long",
"float",
"double",
"boolean",
"null",
"record",
"enum",
"array",
"map",
"union",
"fixed",
"symbolic"};
static_assert((sizeof(typeToString) / sizeof(std::string)) == (AVRO_NUM_TYPES + 1),
"Incorrect Avro typeToString");
} // namespace strings
// this static assert exists because a 32 bit integer is used as a bit-flag for each type,
// and it would be a problem for this flag if we ever supported more than 32 types
static_assert(AVRO_NUM_TYPES < 32, "Too many Avro types");
const std::string &toString(Type type) noexcept {
static std::string undefinedType = "Undefined type";
if (isAvroTypeOrPseudoType(type)) {
return strings::typeToString[type];
} else {
return undefinedType;
}
}
std::ostream &operator<<(std::ostream &os, Type type) {
if (isAvroTypeOrPseudoType(type)) {
os << strings::typeToString[type];
} else {
os << static_cast<int>(type);
}
return os;
}
std::ostream &operator<<(std::ostream &os, const Null &) {
os << "(null value)";
return os;
}
} // namespace avro

View File

@ -1,181 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/format.hpp>
#include <cctype>
#include <sstream>
#include <utility>
#include "Node.hh"
#include "Schema.hh"
#include "ValidSchema.hh"
using boost::format;
using std::make_pair;
using std::ostringstream;
using std::shared_ptr;
using std::static_pointer_cast;
using std::string;
namespace avro {
using SymbolMap = std::map<Name, NodePtr>;
static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
if (!node->isValid()) {
throw Exception(format("Schema is invalid, due to bad node of type %1%")
% node->type());
}
if (node->hasName()) {
const Name &nm = node->name();
// FIXME: replace "find" with "lower_bound". The author seems to have intended
// "lower_bound" here because of (1) the check for the contents of the iterator
// that follows and (2) use of the iterator in insert later in the code.
auto it = symbolMap.find(nm);
auto found = it != symbolMap.end() && nm == it->first;
if (node->type() == AVRO_SYMBOLIC) {
if (!found) {
throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name());
}
shared_ptr<NodeSymbolic> symNode =
static_pointer_cast<NodeSymbolic>(node);
// if the symbolic link is already resolved, we return true,
// otherwise returning false will force it to be resolved
return symNode->isSet();
}
if (found) {
return false;
}
symbolMap.insert(it, make_pair(nm, node));
}
node->lock();
auto leaves = node->leaves();
for (auto i = 0; i < leaves; ++i) {
const NodePtr &leaf(node->leafAt(i));
if (!validate(leaf, symbolMap)) {
// if validate returns false it means a node with this name already
// existed in the map, instead of keeping this node twice in the
// map (which could potentially create circular shared pointer
// links that would not be freed), replace this node with a
// symbolic link to the original one.
node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
}
}
return true;
}
static void validate(const NodePtr &p) {
SymbolMap m;
validate(p, m);
}
ValidSchema::ValidSchema(NodePtr root) : root_(std::move(root)) {
validate(root_);
}
ValidSchema::ValidSchema(const Schema &schema) : root_(schema.root()) {
validate(root_);
}
ValidSchema::ValidSchema() : root_(NullSchema().root()) {
validate(root_);
}
void ValidSchema::setSchema(const Schema &schema) {
root_ = schema.root();
validate(root_);
}
void ValidSchema::toJson(std::ostream &os) const {
root_->printJson(os, 0);
os << '\n';
}
string
ValidSchema::toJson(bool prettyPrint) const {
ostringstream oss;
toJson(oss);
if (!prettyPrint) {
return compactSchema(oss.str());
}
return oss.str();
}
void ValidSchema::toFlatList(std::ostream &os) const {
root_->printBasicInfo(os);
}
/*
* compactSchema compacts and returns a formatted string representation
* of a ValidSchema object by removing the whitespaces outside of the quoted
* field names and values. It can handle the cases where the quoted value is
* in UTF-8 format. Note that this method is not responsible for validating
* the schema.
*/
string ValidSchema::compactSchema(const string &schema) {
auto insideQuote = false;
size_t newPos = 0;
string data = schema;
for (auto c : schema) {
if (!insideQuote && std::isspace(c)) {
// Skip the white spaces outside quotes.
continue;
}
if (c == '\"') {
// It is valid for a quote to be part of the value for some fields,
// e.g., the "doc" field. In that case, the quote is expected to be
// escaped inside the schema. Since the escape character '\\' could
// be escaped itself, we need to check whether there are an even
// number of consecutive slashes prior to the quote.
auto leadingSlashes = 0;
for (int i = static_cast<int>(newPos) - 1; i >= 0; i--) {
if (data[i] == '\\') {
leadingSlashes++;
} else {
break;
}
}
if (leadingSlashes % 2 == 0) {
// Found a real quote which identifies either the start or the
// end of a field name or value.
insideQuote = !insideQuote;
}
}
data[newPos++] = c;
}
if (insideQuote) {
throw Exception("Schema is not well formed with mismatched quotes");
}
if (newPos < schema.size()) {
data.resize(newPos);
}
return data;
}
} // namespace avro

View File

@ -1,256 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <utility>
#include "NodeImpl.hh"
#include "ValidSchema.hh"
#include "Validator.hh"
namespace avro {
Validator::Validator(ValidSchema schema) : schema_(std::move(schema)),
nextType_(AVRO_NULL),
expectedTypesFlag_(0),
compoundStarted_(false),
waitingForCount_(false),
count_(0) {
setupOperation(schema_.root());
}
void Validator::setWaitingForCount() {
waitingForCount_ = true;
count_ = 0;
expectedTypesFlag_ = typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG);
nextType_ = AVRO_LONG;
}
void Validator::enumAdvance() {
if (compoundStarted_) {
setWaitingForCount();
compoundStarted_ = false;
} else {
waitingForCount_ = false;
compoundStack_.pop_back();
}
}
bool Validator::countingSetup() {
auto proceed = true;
if (compoundStarted_) {
setWaitingForCount();
compoundStarted_ = false;
proceed = false;
} else if (waitingForCount_) {
waitingForCount_ = false;
if (count_ == 0) {
compoundStack_.pop_back();
proceed = false;
} else {
counters_.push_back(static_cast<size_t>(count_));
}
}
return proceed;
}
void Validator::countingAdvance() {
if (countingSetup()) {
auto index = (compoundStack_.back().pos)++;
const NodePtr &node = compoundStack_.back().node;
if (index < node->leaves()) {
setupOperation(node->leafAt(index));
} else {
compoundStack_.back().pos = 0;
int count = --counters_.back();
if (count == 0) {
counters_.pop_back();
compoundStarted_ = true;
nextType_ = node->type();
expectedTypesFlag_ = typeToFlag(nextType_);
} else {
index = (compoundStack_.back().pos)++;
setupOperation(node->leafAt(index));
}
}
}
}
void Validator::unionAdvance() {
if (compoundStarted_) {
setWaitingForCount();
compoundStarted_ = false;
} else {
waitingForCount_ = false;
NodePtr node = compoundStack_.back().node;
if (count_ < static_cast<int64_t>(node->leaves())) {
compoundStack_.pop_back();
setupOperation(node->leafAt(static_cast<int>(count_)));
} else {
throw Exception(
boost::format("Union selection out of range, got %1%,"
" expecting 0-%2%")
% count_ % (node->leaves() - 1));
}
}
}
void Validator::fixedAdvance() {
compoundStarted_ = false;
compoundStack_.pop_back();
}
int Validator::nextSizeExpected() const {
return compoundStack_.back().node->fixedSize();
}
void Validator::doAdvance() {
using AdvanceFunc = void (Validator::*)();
// only the compound types need advance functions here
static const AdvanceFunc funcs[] = {
nullptr, // string
nullptr, // bytes
nullptr, // int
nullptr, // long
nullptr, // float
nullptr, // double
nullptr, // bool
nullptr, // null
&Validator::countingAdvance, // Record is treated like counting with count == 1
&Validator::enumAdvance,
&Validator::countingAdvance,
&Validator::countingAdvance,
&Validator::unionAdvance,
&Validator::fixedAdvance};
static_assert((sizeof(funcs) / sizeof(AdvanceFunc)) == (AVRO_NUM_TYPES),
"Invalid number of advance functions");
expectedTypesFlag_ = 0;
// loop until we encounter a next expected type, or we've exited all compound types
while (!expectedTypesFlag_ && !compoundStack_.empty()) {
Type type = compoundStack_.back().node->type();
AdvanceFunc func = funcs[type];
// only compound functions are put on the status stack so it is ok to
// assume that func is not null
assert(func);
((this)->*(func))();
}
if (compoundStack_.empty()) {
nextType_ = AVRO_NULL;
}
}
void Validator::advance() {
if (!waitingForCount_) {
doAdvance();
}
}
void Validator::setCount(int64_t count) {
if (!waitingForCount_) {
throw Exception("Not expecting count");
} else if (count_ < 0) {
throw Exception("Count cannot be negative");
}
count_ = count;
doAdvance();
}
void Validator::setupFlag(Type type) {
// use flags instead of strictly types, so that we can be more lax about the type
// (for example, a long should be able to accept an int type, but not vice versa)
static const flag_t flags[] = {
typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
typeToFlag(AVRO_INT),
typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG),
typeToFlag(AVRO_FLOAT),
typeToFlag(AVRO_DOUBLE),
typeToFlag(AVRO_BOOL),
typeToFlag(AVRO_NULL),
typeToFlag(AVRO_RECORD),
typeToFlag(AVRO_ENUM),
typeToFlag(AVRO_ARRAY),
typeToFlag(AVRO_MAP),
typeToFlag(AVRO_UNION),
typeToFlag(AVRO_FIXED)};
static_assert((sizeof(flags) / sizeof(flag_t)) == (AVRO_NUM_TYPES),
"Invalid number of avro type flags");
expectedTypesFlag_ = flags[type];
}
void Validator::setupOperation(const NodePtr &node) {
nextType_ = node->type();
if (nextType_ == AVRO_SYMBOLIC) {
NodePtr actualNode = resolveSymbol(node);
assert(actualNode);
setupOperation(actualNode);
return;
}
assert(nextType_ < AVRO_SYMBOLIC);
setupFlag(nextType_);
if (!isPrimitive(nextType_)) {
compoundStack_.emplace_back(node);
compoundStarted_ = true;
}
}
bool Validator::getCurrentRecordName(std::string &name) const {
auto found = false;
name.clear();
// if the top of the stack is a record I want this record name
auto idx = static_cast<int>(compoundStack_.size() - ((!compoundStack_.empty() && (isPrimitive(nextType_) || nextType_ == AVRO_RECORD)) ? 1 : 2));
if (idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
name = compoundStack_[idx].node->name().simpleName();
found = true;
}
return found;
}
bool Validator::getNextFieldName(std::string &name) const {
auto found = false;
name.clear();
auto idx = static_cast<int>(compoundStack_.size() - (isCompound(nextType_) ? 2 : 1));
if (idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
size_t pos = compoundStack_[idx].pos - 1;
const NodePtr &node = compoundStack_[idx].node;
if (pos < node->leaves()) {
name = node->nameAt(pos);
found = true;
}
}
return found;
}
} // namespace avro

View File

@ -1,57 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Zigzag.hh"
namespace avro {
// TODO: The following two functions have exactly the same code except for the type.
// They should be implemented as a template.
size_t
encodeInt64(int64_t input, std::array<uint8_t, 10> &output) noexcept {
auto val = encodeZigzag64(input);
// put values in an array of bytes with variable length encoding
const int mask = 0x7F;
auto v = val & mask;
size_t bytesOut = 0;
while (val >>= 7) {
output[bytesOut++] = (v | 0x80);
v = val & mask;
}
output[bytesOut++] = v;
return bytesOut;
}
size_t
encodeInt32(int32_t input, std::array<uint8_t, 5> &output) noexcept {
auto val = encodeZigzag32(input);
// put values in an array of bytes with variable length encoding
const int mask = 0x7F;
auto v = val & mask;
size_t bytesOut = 0;
while (val >>= 7) {
output[bytesOut++] = (v | 0x80);
v = val & mask;
}
output[bytesOut++] = v;
return bytesOut;
}
} // namespace avro

View File

@ -1,859 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cctype>
#ifndef _WIN32
#include <ctime>
#endif
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <utility>
#include <boost/algorithm/string_regex.hpp>
#include "Compiler.hh"
#include "NodeImpl.hh"
#include "ValidSchema.hh"
using avro::NodePtr;
using avro::resolveSymbol;
using std::ifstream;
using std::map;
using std::ofstream;
using std::ostream;
using std::set;
using std::string;
using std::vector;
using boost::lexical_cast;
using avro::compileJsonSchema;
using avro::ValidSchema;
#if __cplusplus >= 201703L
#define ANY_NS "std"
#else
#define ANY_NS "boost"
#endif
struct PendingSetterGetter {
string structName;
string type;
string name;
size_t idx;
PendingSetterGetter(string sn, string t, string n, size_t i) : structName(std::move(sn)), type(std::move(t)), name(std::move(n)), idx(i) {}
};
struct PendingConstructor {
string structName;
string memberName;
bool initMember;
PendingConstructor(string sn, string n, bool im) : structName(std::move(sn)), memberName(std::move(n)), initMember(im) {}
};
class CodeGen {
size_t unionNumber_;
std::ostream &os_;
bool inNamespace_;
const std::string ns_;
const std::string schemaFile_;
const std::string headerFile_;
const std::string includePrefix_;
const bool noUnion_;
const std::string guardString_;
boost::mt19937 random_;
vector<PendingSetterGetter> pendingGettersAndSetters;
vector<PendingConstructor> pendingConstructors;
map<NodePtr, string> done;
set<NodePtr> doing;
std::string guard();
std::string fullname(const string &name) const;
std::string generateEnumType(const NodePtr &n);
std::string cppTypeOf(const NodePtr &n);
std::string generateRecordType(const NodePtr &n);
std::string unionName();
std::string generateUnionType(const NodePtr &n);
std::string generateType(const NodePtr &n);
std::string generateDeclaration(const NodePtr &n);
std::string doGenerateType(const NodePtr &n);
void generateEnumTraits(const NodePtr &n);
void generateTraits(const NodePtr &n);
void generateRecordTraits(const NodePtr &n);
void generateUnionTraits(const NodePtr &n);
void emitCopyright();
public:
CodeGen(std::ostream &os, std::string ns,
std::string schemaFile, std::string headerFile,
std::string guardString,
std::string includePrefix, bool noUnion) : unionNumber_(0), os_(os), inNamespace_(false), ns_(std::move(ns)),
schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)),
includePrefix_(std::move(includePrefix)), noUnion_(noUnion),
guardString_(std::move(guardString)),
random_(static_cast<uint32_t>(::time(nullptr))) {}
void generate(const ValidSchema &schema);
};
static string decorate(const std::string &name) {
static const char *cppReservedWords[] = {
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break",
"case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept",
"const", "consteval", "constexpr", "const_cast", "continue", "co_await", "co_return",
"co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else",
"enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if",
"import", "inline", "int", "long", "module", "mutable", "namespace", "new", "noexcept", "not",
"not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "reflexpr",
"register", "reinterpret_cast", "requires", "return", "short", "signed", "sizeof", "static",
"static_assert", "static_cast", "struct", "switch", "synchronized", "template", "this",
"thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned",
"using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq"};
for (auto &cppReservedWord : cppReservedWords)
if (strcmp(name.c_str(), cppReservedWord) == 0)
return name + '_';
return name;
}
static string decorate(const avro::Name &name) {
return decorate(name.simpleName());
}
string CodeGen::fullname(const string &name) const {
return ns_.empty() ? name : (ns_ + "::" + name);
}
string CodeGen::generateEnumType(const NodePtr &n) {
string s = decorate(n->name());
os_ << "enum class " << s << ": unsigned {\n";
size_t c = n->names();
for (size_t i = 0; i < c; ++i) {
os_ << " " << decorate(n->nameAt(i)) << ",\n";
}
os_ << "};\n\n";
return s;
}
string CodeGen::cppTypeOf(const NodePtr &n) {
switch (n->type()) {
case avro::AVRO_STRING:
return "std::string";
case avro::AVRO_BYTES:
return "std::vector<uint8_t>";
case avro::AVRO_INT:
return "int32_t";
case avro::AVRO_LONG:
return "int64_t";
case avro::AVRO_FLOAT:
return "float";
case avro::AVRO_DOUBLE:
return "double";
case avro::AVRO_BOOL:
return "bool";
case avro::AVRO_RECORD:
case avro::AVRO_ENUM: {
string nm = decorate(n->name());
return inNamespace_ ? nm : fullname(nm);
}
case avro::AVRO_ARRAY:
return "std::vector<" + cppTypeOf(n->leafAt(0)) + " >";
case avro::AVRO_MAP:
return "std::map<std::string, " + cppTypeOf(n->leafAt(1)) + " >";
case avro::AVRO_FIXED:
return "std::array<uint8_t, " + lexical_cast<string>(n->fixedSize()) + ">";
case avro::AVRO_SYMBOLIC:
return cppTypeOf(resolveSymbol(n));
case avro::AVRO_UNION:
return fullname(done[n]);
case avro::AVRO_NULL:
return "avro::null";
default:
return "$Undefined$";
}
}
static string cppNameOf(const NodePtr &n) {
switch (n->type()) {
case avro::AVRO_NULL:
return "null";
case avro::AVRO_STRING:
return "string";
case avro::AVRO_BYTES:
return "bytes";
case avro::AVRO_INT:
return "int";
case avro::AVRO_LONG:
return "long";
case avro::AVRO_FLOAT:
return "float";
case avro::AVRO_DOUBLE:
return "double";
case avro::AVRO_BOOL:
return "bool";
case avro::AVRO_RECORD:
case avro::AVRO_ENUM:
case avro::AVRO_FIXED:
return decorate(n->name());
case avro::AVRO_ARRAY:
return "array";
case avro::AVRO_MAP:
return "map";
case avro::AVRO_SYMBOLIC:
return cppNameOf(resolveSymbol(n));
default:
return "$Undefined$";
}
}
string CodeGen::generateRecordType(const NodePtr &n) {
size_t c = n->leaves();
string decoratedName = decorate(n->name());
vector<string> types;
for (size_t i = 0; i < c; ++i) {
types.push_back(generateType(n->leafAt(i)));
}
map<NodePtr, string>::const_iterator it = done.find(n);
if (it != done.end()) {
return it->second;
}
os_ << "struct " << decoratedName << " {\n";
if (!noUnion_) {
for (size_t i = 0; i < c; ++i) {
if (n->leafAt(i)->type() == avro::AVRO_UNION) {
os_ << " typedef " << types[i]
<< ' ' << n->nameAt(i) << "_t;\n";
}
}
}
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) {
os_ << " " << decoratedNameAt << "_t";
} else {
os_ << " " << types[i];
}
os_ << ' ' << decoratedNameAt << ";\n";
}
os_ << " " << decoratedName << "()";
if (c > 0) {
os_ << " :";
}
os_ << "\n";
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
os_ << " " << decoratedNameAt << "(";
if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
os_ << decoratedNameAt << "_t";
} else {
os_ << types[i];
}
os_ << "())";
if (i != (c - 1)) {
os_ << ',';
}
os_ << "\n";
}
os_ << " { }\n";
os_ << "};\n\n";
return decoratedName;
}
void makeCanonical(string &s, bool foldCase) {
for (char &c : s) {
if (isalpha(c)) {
if (foldCase) {
c = static_cast<char>(toupper(c));
}
} else if (!isdigit(c)) {
c = '_';
}
}
}
string CodeGen::unionName() {
string s = schemaFile_;
string::size_type n = s.find_last_of("/\\");
if (n != string::npos) {
s = s.substr(n);
}
makeCanonical(s, false);
return s + "_Union__" + boost::lexical_cast<string>(unionNumber_++) + "__";
}
static void generateGetterAndSetter(ostream &os,
const string &structName, const string &type, const string &name,
size_t idx) {
string sn = " " + structName + "::";
os << "inline\n";
os << type << sn << "get_" << name << "() const {\n"
<< " if (idx_ != " << idx << ") {\n"
<< " throw avro::Exception(\"Invalid type for "
<< "union\");\n"
<< " }\n"
<< " return " << ANY_NS << "::any_cast<" << type << " >(value_);\n"
<< "}\n\n";
os << "inline\n"
<< "void" << sn << "set_" << name
<< "(const " << type << "& v) {\n"
<< " idx_ = " << idx << ";\n"
<< " value_ = v;\n"
<< "}\n\n";
}
static void generateConstructor(ostream &os,
const string &structName, bool initMember,
const string &type) {
os << "inline " << structName << "::" << structName << "() : idx_(0)";
if (initMember) {
os << ", value_(" << type << "())";
}
os << " { }\n";
}
/**
* Generates a type for union and emits the code.
* Since unions can encounter names that are not fully defined yet,
* such names must be declared and the inline functions deferred until all
* types are fully defined.
*/
string CodeGen::generateUnionType(const NodePtr &n) {
size_t c = n->leaves();
vector<string> types;
vector<string> names;
auto it = doing.find(n);
if (it != doing.end()) {
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
types.push_back(generateDeclaration(nn));
names.push_back(cppNameOf(nn));
}
} else {
doing.insert(n);
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
types.push_back(generateType(nn));
names.push_back(cppNameOf(nn));
}
doing.erase(n);
}
if (done.find(n) != done.end()) {
return done[n];
}
auto result = unionName();
os_ << "struct " << result << " {\n"
<< "private:\n"
<< " size_t idx_;\n"
<< " " << ANY_NS << "::any value_;\n"
<< "public:\n"
<< " size_t idx() const { return idx_; }\n";
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
if (nn->type() == avro::AVRO_NULL) {
os_ << " bool is_null() const {\n"
<< " return (idx_ == " << i << ");\n"
<< " }\n"
<< " void set_null() {\n"
<< " idx_ = " << i << ";\n"
<< " value_ = " << ANY_NS << "::any();\n"
<< " }\n";
} else {
const string &type = types[i];
const string &name = names[i];
os_ << " " << type << " get_" << name << "() const;\n"
" void set_"
<< name << "(const " << type << "& v);\n";
pendingGettersAndSetters.emplace_back(result, type, name, i);
}
}
os_ << " " << result << "();\n";
pendingConstructors.emplace_back(result, types[0],
n->leafAt(0)->type() != avro::AVRO_NULL);
os_ << "};\n\n";
return result;
}
/**
* Returns the type for the given schema node and emits code to os.
*/
string CodeGen::generateType(const NodePtr &n) {
NodePtr nn = (n->type() == avro::AVRO_SYMBOLIC) ? resolveSymbol(n) : n;
map<NodePtr, string>::const_iterator it = done.find(nn);
if (it != done.end()) {
return it->second;
}
string result = doGenerateType(nn);
done[nn] = result;
return result;
}
string CodeGen::doGenerateType(const NodePtr &n) {
switch (n->type()) {
case avro::AVRO_STRING:
case avro::AVRO_BYTES:
case avro::AVRO_INT:
case avro::AVRO_LONG:
case avro::AVRO_FLOAT:
case avro::AVRO_DOUBLE:
case avro::AVRO_BOOL:
case avro::AVRO_NULL:
case avro::AVRO_FIXED:
return cppTypeOf(n);
case avro::AVRO_ARRAY: {
const NodePtr &ln = n->leafAt(0);
string dn;
if (doing.find(n) == doing.end()) {
doing.insert(n);
dn = generateType(ln);
doing.erase(n);
} else {
dn = generateDeclaration(ln);
}
return "std::vector<" + dn + " >";
}
case avro::AVRO_MAP: {
const NodePtr &ln = n->leafAt(1);
string dn;
if (doing.find(n) == doing.end()) {
doing.insert(n);
dn = generateType(ln);
doing.erase(n);
} else {
dn = generateDeclaration(ln);
}
return "std::map<std::string, " + dn + " >";
}
case avro::AVRO_RECORD:
return generateRecordType(n);
case avro::AVRO_ENUM:
return generateEnumType(n);
case avro::AVRO_UNION:
return generateUnionType(n);
default:
break;
}
return "$Undefined$";
}
string CodeGen::generateDeclaration(const NodePtr &n) {
NodePtr nn = (n->type() == avro::AVRO_SYMBOLIC) ? resolveSymbol(n) : n;
switch (nn->type()) {
case avro::AVRO_STRING:
case avro::AVRO_BYTES:
case avro::AVRO_INT:
case avro::AVRO_LONG:
case avro::AVRO_FLOAT:
case avro::AVRO_DOUBLE:
case avro::AVRO_BOOL:
case avro::AVRO_NULL:
case avro::AVRO_FIXED:
return cppTypeOf(nn);
case avro::AVRO_ARRAY:
return "std::vector<" + generateDeclaration(nn->leafAt(0)) + " >";
case avro::AVRO_MAP:
return "std::map<std::string, " + generateDeclaration(nn->leafAt(1)) + " >";
case avro::AVRO_RECORD:
os_ << "struct " << cppTypeOf(nn) << ";\n";
return cppTypeOf(nn);
case avro::AVRO_ENUM:
return generateEnumType(nn);
case avro::AVRO_UNION:
// FIXME: When can this happen?
return generateUnionType(nn);
default:
break;
}
return "$Undefined$";
}
void CodeGen::generateEnumTraits(const NodePtr &n) {
string dname = decorate(n->name());
string fn = fullname(dname);
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
string last = decorate(n->nameAt(n->names() - 1));
os_ << "template<> struct codec_traits<" << fn << "> {\n"
<< " static void encode(Encoder& e, " << fn << " v) {\n"
<< " if (v > " << fn << "::" << last << ")\n"
<< " {\n"
<< " std::ostringstream error;\n"
<< R"( error << "enum value " << static_cast<unsigned>(v) << " is out of bound for )" << fn
<< " and cannot be encoded\";\n"
<< " throw avro::Exception(error.str());\n"
<< " }\n"
<< " e.encodeEnum(static_cast<size_t>(v));\n"
<< " }\n"
<< " static void decode(Decoder& d, " << fn << "& v) {\n"
<< " size_t index = d.decodeEnum();\n"
<< " if (index > static_cast<size_t>(" << fn << "::" << last << "))\n"
<< " {\n"
<< " std::ostringstream error;\n"
<< R"( error << "enum value " << index << " is out of bound for )" << fn
<< " and cannot be decoded\";\n"
<< " throw avro::Exception(error.str());\n"
<< " }\n"
<< " v = static_cast<" << fn << ">(index);\n"
<< " }\n"
<< "};\n\n";
}
void CodeGen::generateRecordTraits(const NodePtr &n) {
size_t c = n->leaves();
for (size_t i = 0; i < c; ++i) {
generateTraits(n->leafAt(i));
}
string fn = fullname(decorate(n->name()));
os_ << "template<> struct codec_traits<" << fn << "> {\n"
<< " static void encode(Encoder& e, const " << fn << "& v) {\n";
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
os_ << " avro::encode(e, v." << decoratedNameAt << ");\n";
}
os_ << " }\n"
<< " static void decode(Decoder& d, " << fn << "& v) {\n";
os_ << " if (avro::ResolvingDecoder *rd =\n";
os_ << " dynamic_cast<avro::ResolvingDecoder *>(&d)) {\n";
os_ << " const std::vector<size_t> fo = rd->fieldOrder();\n";
os_ << " for (std::vector<size_t>::const_iterator it = fo.begin();\n";
os_ << " it != fo.end(); ++it) {\n";
os_ << " switch (*it) {\n";
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
os_ << " case " << i << ":\n";
os_ << " avro::decode(d, v." << decoratedNameAt << ");\n";
os_ << " break;\n";
}
os_ << " default:\n";
os_ << " break;\n";
os_ << " }\n";
os_ << " }\n";
os_ << " } else {\n";
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
os_ << " avro::decode(d, v." << decoratedNameAt << ");\n";
}
os_ << " }\n";
os_ << " }\n"
<< "};\n\n";
}
void CodeGen::generateUnionTraits(const NodePtr &n) {
size_t c = n->leaves();
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
generateTraits(nn);
}
string name = done[n];
string fn = fullname(name);
os_ << "template<> struct codec_traits<" << fn << "> {\n"
<< " static void encode(Encoder& e, " << fn << " v) {\n"
<< " e.encodeUnionIndex(v.idx());\n"
<< " switch (v.idx()) {\n";
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
os_ << " case " << i << ":\n";
if (nn->type() == avro::AVRO_NULL) {
os_ << " e.encodeNull();\n";
} else {
os_ << " avro::encode(e, v.get_" << cppNameOf(nn)
<< "());\n";
}
os_ << " break;\n";
}
os_ << " }\n"
<< " }\n"
<< " static void decode(Decoder& d, " << fn << "& v) {\n"
<< " size_t n = d.decodeUnionIndex();\n"
<< " if (n >= " << c << ") { throw avro::Exception(\""
"Union index too big\"); }\n"
<< " switch (n) {\n";
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
os_ << " case " << i << ":\n";
if (nn->type() == avro::AVRO_NULL) {
os_ << " d.decodeNull();\n"
<< " v.set_null();\n";
} else {
os_ << " {\n"
<< " " << cppTypeOf(nn) << " vv;\n"
<< " avro::decode(d, vv);\n"
<< " v.set_" << cppNameOf(nn) << "(vv);\n"
<< " }\n";
}
os_ << " break;\n";
}
os_ << " }\n"
<< " }\n"
<< "};\n\n";
}
void CodeGen::generateTraits(const NodePtr &n) {
switch (n->type()) {
case avro::AVRO_STRING:
case avro::AVRO_BYTES:
case avro::AVRO_INT:
case avro::AVRO_LONG:
case avro::AVRO_FLOAT:
case avro::AVRO_DOUBLE:
case avro::AVRO_BOOL:
case avro::AVRO_NULL:
break;
case avro::AVRO_RECORD:
generateRecordTraits(n);
break;
case avro::AVRO_ENUM:
generateEnumTraits(n);
break;
case avro::AVRO_ARRAY:
case avro::AVRO_MAP:
generateTraits(n->leafAt(n->type() == avro::AVRO_ARRAY ? 0 : 1));
break;
case avro::AVRO_UNION:
generateUnionTraits(n);
break;
case avro::AVRO_FIXED:
default:
break;
}
}
void CodeGen::emitCopyright() {
os_ << "/**\n"
" * Licensed to the Apache Software Foundation (ASF) under one\n"
" * or more contributor license agreements. See the NOTICE file\n"
" * distributed with this work for additional information\n"
" * regarding copyright ownership. The ASF licenses this file\n"
" * to you under the Apache License, Version 2.0 (the\n"
" * \"License\"); you may not use this file except in compliance\n"
" * with the License. You may obtain a copy of the License at\n"
" *\n"
" * https://www.apache.org/licenses/LICENSE-2.0\n"
" *\n"
" * Unless required by applicable law or agreed to in writing, "
"software\n"
" * distributed under the License is distributed on an "
"\"AS IS\" BASIS,\n"
" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express "
"or implied.\n"
" * See the License for the specific language governing "
"permissions and\n"
" * limitations under the License.\n"
" */\n\n\n";
}
string CodeGen::guard() {
string h = headerFile_;
makeCanonical(h, true);
return h + "_" + lexical_cast<string>(random_()) + "__H_";
}
void CodeGen::generate(const ValidSchema &schema) {
emitCopyright();
string h = guardString_.empty() ? guard() : guardString_;
os_ << "#ifndef " << h << "\n";
os_ << "#define " << h << "\n\n\n";
os_ << "#include <sstream>\n"
#if __cplusplus >= 201703L
<< "#include <any>\n"
#else
<< "#include \"boost/any.hpp\"\n"
#endif
<< "#include \"" << includePrefix_ << "Specific.hh\"\n"
<< "#include \"" << includePrefix_ << "Encoder.hh\"\n"
<< "#include \"" << includePrefix_ << "Decoder.hh\"\n"
<< "\n";
vector<string> nsVector;
if (!ns_.empty()) {
boost::algorithm::split_regex(nsVector, ns_, boost::regex("::"));
for (vector<string>::const_iterator it =
nsVector.begin();
it != nsVector.end(); ++it) {
os_ << "namespace " << *it << " {\n";
}
inNamespace_ = true;
}
const NodePtr &root = schema.root();
generateType(root);
for (vector<PendingSetterGetter>::const_iterator it =
pendingGettersAndSetters.begin();
it != pendingGettersAndSetters.end(); ++it) {
generateGetterAndSetter(os_, it->structName, it->type, it->name,
it->idx);
}
for (vector<PendingConstructor>::const_iterator it =
pendingConstructors.begin();
it != pendingConstructors.end(); ++it) {
generateConstructor(os_, it->structName,
it->initMember, it->memberName);
}
if (!ns_.empty()) {
inNamespace_ = false;
for (vector<string>::const_iterator it =
nsVector.begin();
it != nsVector.end(); ++it) {
os_ << "}\n";
}
}
os_ << "namespace avro {\n";
unionNumber_ = 0;
generateTraits(root);
os_ << "}\n";
os_ << "#endif\n";
os_.flush();
}
namespace po = boost::program_options;
static string readGuard(const string &filename) {
std::ifstream ifs(filename.c_str());
string buf;
string candidate;
while (std::getline(ifs, buf)) {
boost::algorithm::trim(buf);
if (candidate.empty()) {
if (boost::algorithm::starts_with(buf, "#ifndef ")) {
candidate = buf.substr(8);
}
} else if (boost::algorithm::starts_with(buf, "#define ")) {
if (candidate == buf.substr(8)) {
break;
}
} else {
candidate.erase();
}
}
return candidate;
}
int main(int argc, char **argv) {
const string NS("namespace");
const string OUT_FILE("output");
const string IN_FILE("input");
const string INCLUDE_PREFIX("include-prefix");
const string NO_UNION_TYPEDEF("no-union-typedef");
po::options_description desc("Allowed options");
desc.add_options()("help,h", "produce help message")("include-prefix,p", po::value<string>()->default_value("avro"),
"prefix for include headers, - for none, default: avro")("no-union-typedef,U", "do not generate typedefs for unions in records")("namespace,n", po::value<string>(), "set namespace for generated code")("input,i", po::value<string>(), "input file")("output,o", po::value<string>(), "output file to generate");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
if (vm.count("help") || vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) {
std::cout << desc << std::endl;
return 1;
}
string ns = vm.count(NS) > 0 ? vm[NS].as<string>() : string();
string outf = vm.count(OUT_FILE) > 0 ? vm[OUT_FILE].as<string>() : string();
string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as<string>() : string();
string incPrefix = vm[INCLUDE_PREFIX].as<string>();
bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0;
if (incPrefix == "-") {
incPrefix.clear();
} else if (*incPrefix.rbegin() != '/') {
incPrefix += "/";
}
try {
ValidSchema schema;
if (!inf.empty()) {
ifstream in(inf.c_str());
compileJsonSchema(in, schema);
} else {
compileJsonSchema(std::cin, schema);
}
if (!outf.empty()) {
string g = readGuard(outf);
ofstream out(outf.c_str());
CodeGen(out, ns, inf, outf, g, incPrefix, noUnion).generate(schema);
} else {
CodeGen(std::cout, ns, inf, outf, "", incPrefix, noUnion).generate(schema);
}
return 0;
} catch (std::exception &e) {
std::cerr << "Failed to parse or compile schema: "
<< e.what() << std::endl;
return 1;
}
}

View File

@ -1,185 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "JsonDom.hh"
#include <stdexcept>
#include <cstring>
#include "JsonIO.hh"
#include "Stream.hh"
using boost::format;
using std::string;
namespace avro {
namespace json {
const char *typeToString(EntityType t) {
switch (t) {
case EntityType::Null: return "null";
case EntityType::Bool: return "bool";
case EntityType::Long: return "long";
case EntityType::Double: return "double";
case EntityType::String: return "string";
case EntityType::Arr: return "array";
case EntityType::Obj: return "object";
default: return "unknown";
}
}
Entity readEntity(JsonParser &p) {
switch (p.peek()) {
case JsonParser::Token::Null:
p.advance();
return Entity(p.line());
case JsonParser::Token::Bool:
p.advance();
return Entity(p.boolValue(), p.line());
case JsonParser::Token::Long:
p.advance();
return Entity(p.longValue(), p.line());
case JsonParser::Token::Double:
p.advance();
return Entity(p.doubleValue(), p.line());
case JsonParser::Token::String:
p.advance();
return Entity(std::make_shared<String>(p.rawString()), p.line());
case JsonParser::Token::ArrayStart: {
size_t l = p.line();
p.advance();
std::shared_ptr<Array> v = std::make_shared<Array>();
while (p.peek() != JsonParser::Token::ArrayEnd) {
v->push_back(readEntity(p));
}
p.advance();
return Entity(v, l);
}
case JsonParser::Token::ObjectStart: {
size_t l = p.line();
p.advance();
std::shared_ptr<Object> v = std::make_shared<Object>();
while (p.peek() != JsonParser::Token::ObjectEnd) {
p.advance();
std::string k = p.stringValue();
Entity n = readEntity(p);
v->insert(std::make_pair(k, n));
}
p.advance();
return Entity(v, l);
}
default:
throw std::domain_error(JsonParser::toString(p.peek()));
}
}
Entity loadEntity(const char *text) {
return loadEntity(reinterpret_cast<const uint8_t *>(text), ::strlen(text));
}
Entity loadEntity(InputStream &in) {
JsonParser p;
p.init(in);
return readEntity(p);
}
Entity loadEntity(const uint8_t *text, size_t len) {
std::unique_ptr<InputStream> in = memoryInputStream(text, len);
return loadEntity(*in);
}
void writeEntity(JsonGenerator<JsonNullFormatter> &g, const Entity &n) {
switch (n.type()) {
case EntityType::Null:
g.encodeNull();
break;
case EntityType::Bool:
g.encodeBool(n.boolValue());
break;
case EntityType::Long:
g.encodeNumber(n.longValue());
break;
case EntityType::Double:
g.encodeNumber(n.doubleValue());
break;
case EntityType::String:
g.encodeString(n.stringValue());
break;
case EntityType::Arr: {
g.arrayStart();
const Array &v = n.arrayValue();
for (const auto &it : v) {
writeEntity(g, it);
}
g.arrayEnd();
} break;
case EntityType::Obj: {
g.objectStart();
const Object &v = n.objectValue();
for (const auto &it : v) {
g.encodeString(it.first);
writeEntity(g, it.second);
}
g.objectEnd();
} break;
}
}
void Entity::ensureType(EntityType type) const {
if (type_ != type) {
format msg = format("Invalid type. Expected \"%1%\" actual %2%") % typeToString(type) % typeToString(type_);
throw Exception(msg);
}
}
String Entity::stringValue() const {
ensureType(EntityType::String);
return JsonParser::toStringValue(**boost::any_cast<std::shared_ptr<String>>(&value_));
}
String Entity::bytesValue() const {
ensureType(EntityType::String);
return JsonParser::toBytesValue(**boost::any_cast<std::shared_ptr<String>>(&value_));
}
std::string Entity::toString() const {
std::unique_ptr<OutputStream> out = memoryOutputStream();
JsonGenerator<JsonNullFormatter> g;
g.init(*out);
writeEntity(g, *this);
g.flush();
std::unique_ptr<InputStream> in = memoryInputStream(*out);
const uint8_t *p = nullptr;
size_t n = 0;
size_t c = 0;
while (in->next(&p, &n)) {
c += n;
}
std::string result;
result.resize(c);
c = 0;
std::unique_ptr<InputStream> in2 = memoryInputStream(*out);
while (in2->next(&p, &n)) {
::memcpy(&result[c], p, n);
c += n;
}
return result;
}
} // namespace json
} // namespace avro

View File

@ -1,183 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_json_JsonDom_hh__
#define avro_json_JsonDom_hh__
#include <cstdint>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "Config.hh"
#include "boost/any.hpp"
namespace avro {
class AVRO_DECL InputStream;
namespace json {
class Entity;
typedef bool Bool;
typedef int64_t Long;
typedef double Double;
typedef std::string String;
typedef std::vector<Entity> Array;
typedef std::map<std::string, Entity> Object;
class AVRO_DECL JsonParser;
class JsonNullFormatter;
template<typename F = JsonNullFormatter>
class AVRO_DECL JsonGenerator;
enum class EntityType {
Null,
Bool,
Long,
Double,
String,
Arr,
Obj
};
const char *typeToString(EntityType t);
inline std::ostream &operator<<(std::ostream &os, EntityType et) {
return os << typeToString(et);
}
class AVRO_DECL Entity {
EntityType type_;
boost::any value_;
size_t line_; // can't be const else noncopyable...
void ensureType(EntityType) const;
public:
explicit Entity(size_t line = 0) : type_(EntityType::Null), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(const std::shared_ptr<String> &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(const std::shared_ptr<Array> &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
Entity(const std::shared_ptr<Object> &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {}
EntityType type() const { return type_; }
size_t line() const { return line_; }
Bool boolValue() const {
ensureType(EntityType::Bool);
return boost::any_cast<Bool>(value_);
}
Long longValue() const {
ensureType(EntityType::Long);
return boost::any_cast<Long>(value_);
}
Double doubleValue() const {
ensureType(EntityType::Double);
return boost::any_cast<Double>(value_);
}
String stringValue() const;
String bytesValue() const;
const Array &arrayValue() const {
ensureType(EntityType::Arr);
return **boost::any_cast<std::shared_ptr<Array>>(&value_);
}
const Object &objectValue() const {
ensureType(EntityType::Obj);
return **boost::any_cast<std::shared_ptr<Object>>(&value_);
}
std::string toString() const;
};
template<typename T>
struct type_traits {
};
template<>
struct type_traits<bool> {
static EntityType type() { return EntityType::Bool; }
static const char *name() { return "bool"; }
};
template<>
struct type_traits<int64_t> {
static EntityType type() { return EntityType::Long; }
static const char *name() { return "long"; }
};
template<>
struct type_traits<double> {
static EntityType type() { return EntityType::Double; }
static const char *name() { return "double"; }
};
template<>
struct type_traits<std::string> {
static EntityType type() { return EntityType::String; }
static const char *name() { return "string"; }
};
template<>
struct type_traits<std::vector<Entity>> {
static EntityType type() { return EntityType::Arr; }
static const char *name() { return "array"; }
};
template<>
struct type_traits<std::map<std::string, Entity>> {
static EntityType type() { return EntityType::Obj; }
static const char *name() { return "object"; }
};
AVRO_DECL Entity readEntity(JsonParser &p);
AVRO_DECL Entity loadEntity(InputStream &in);
AVRO_DECL Entity loadEntity(const char *text);
AVRO_DECL Entity loadEntity(const uint8_t *text, size_t len);
void writeEntity(JsonGenerator<JsonNullFormatter> &g, const Entity &n);
} // namespace json
} // namespace avro
#endif

View File

@ -1,426 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "JsonIO.hh"
namespace avro {
namespace json {
using std::ostringstream;
using std::string;
const char *const
JsonParser::tokenNames[] = {
"Null",
"Bool",
"Integer",
"Double",
"String",
"Array start",
"Array end",
"Object start",
"Object end",
};
char JsonParser::next() {
char ch = hasNext ? nextChar : ' ';
while (isspace(ch)) {
if (ch == '\n') {
line_++;
}
ch = in_.read();
}
hasNext = false;
return ch;
}
void JsonParser::expectToken(Token tk) {
if (advance() != tk) {
if (tk == Token::Double) {
if (cur() == Token::String
&& (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) {
curToken = Token::Double;
dv = sv == "Infinity" ? std::numeric_limits<double>::infinity() : sv == "-Infinity" ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::quiet_NaN();
return;
} else if (cur() == Token::Long) {
dv = double(lv);
return;
}
}
ostringstream oss;
oss << "Incorrect token in the stream. Expected: "
<< JsonParser::toString(tk) << ", found "
<< JsonParser::toString(cur());
throw Exception(oss.str());
}
}
JsonParser::Token JsonParser::doAdvance() {
char ch = next();
if (ch == ']') {
if (curState == stArray0 || curState == stArrayN) {
curState = stateStack.top();
stateStack.pop();
return Token::ArrayEnd;
} else {
throw unexpected(ch);
}
} else if (ch == '}') {
if (curState == stObject0 || curState == stObjectN) {
curState = stateStack.top();
stateStack.pop();
return Token::ObjectEnd;
} else {
throw unexpected(ch);
}
} else if (ch == ',') {
if (curState != stObjectN && curState != stArrayN) {
throw unexpected(ch);
}
if (curState == stObjectN) {
curState = stObject0;
}
ch = next();
} else if (ch == ':') {
if (curState != stKey) {
throw unexpected(ch);
}
curState = stObjectN;
ch = next();
}
if (curState == stObject0) {
if (ch != '"') {
throw unexpected(ch);
}
curState = stKey;
} else if (curState == stArray0) {
curState = stArrayN;
}
switch (ch) {
case '[':
stateStack.push(curState);
curState = stArray0;
return Token::ArrayStart;
case '{':
stateStack.push(curState);
curState = stObject0;
return Token::ObjectStart;
case '"':
return tryString();
case 't':
bv = true;
return tryLiteral("rue", 3, Token::Bool);
case 'f':
bv = false;
return tryLiteral("alse", 4, Token::Bool);
case 'n':
return tryLiteral("ull", 3, Token::Null);
default:
if (isdigit(ch) || ch == '-') {
return tryNumber(ch);
} else {
throw unexpected(ch);
}
}
}
JsonParser::Token JsonParser::tryNumber(char ch) {
sv.clear();
sv.push_back(ch);
hasNext = false;
int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2;
for (;;) {
switch (state) {
case 0:
if (in_.hasMore()) {
ch = in_.read();
if (isdigit(ch)) {
state = (ch == '0') ? 1 : 2;
sv.push_back(ch);
continue;
}
hasNext = true;
}
break;
case 1:
if (in_.hasMore()) {
ch = in_.read();
if (ch == '.') {
state = 3;
sv.push_back(ch);
continue;
} else if (ch == 'e' || ch == 'E') {
sv.push_back(ch);
state = 5;
continue;
}
hasNext = true;
}
break;
case 2:
if (in_.hasMore()) {
ch = in_.read();
if (isdigit(ch)) {
sv.push_back(ch);
continue;
} else if (ch == '.') {
state = 3;
sv.push_back(ch);
continue;
} else if (ch == 'e' || ch == 'E') {
sv.push_back(ch);
state = 5;
continue;
}
hasNext = true;
}
break;
case 3:
case 6:
if (in_.hasMore()) {
ch = in_.read();
if (isdigit(ch)) {
sv.push_back(ch);
state++;
continue;
}
hasNext = true;
}
break;
case 4:
if (in_.hasMore()) {
ch = in_.read();
if (isdigit(ch)) {
sv.push_back(ch);
continue;
} else if (ch == 'e' || ch == 'E') {
sv.push_back(ch);
state = 5;
continue;
}
hasNext = true;
}
break;
case 5:
if (in_.hasMore()) {
ch = in_.read();
if (ch == '+' || ch == '-') {
sv.push_back(ch);
state = 6;
continue;
} else if (isdigit(ch)) {
sv.push_back(ch);
state = 7;
continue;
}
hasNext = true;
}
break;
case 7:
if (in_.hasMore()) {
ch = in_.read();
if (isdigit(ch)) {
sv.push_back(ch);
continue;
}
hasNext = true;
}
break;
default:
throw Exception("Unexpected JSON parse state");
}
if (state == 1 || state == 2 || state == 4 || state == 7) {
if (hasNext) {
nextChar = ch;
}
std::istringstream iss(sv);
if (state == 1 || state == 2) {
iss >> lv;
return Token::Long;
} else {
iss >> dv;
return Token::Double;
}
} else {
if (hasNext) {
throw unexpected(ch);
} else {
throw Exception("Unexpected EOF");
}
}
}
}
JsonParser::Token JsonParser::tryString() {
sv.clear();
for (;;) {
char ch = in_.read();
if (ch == '"') {
return Token::String;
} else if (ch == '\\') {
ch = in_.read();
switch (ch) {
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
sv.push_back('\\');
sv.push_back(ch);
break;
case 'u':
case 'U': {
uint32_t n = 0;
char e[4];
in_.readBytes(reinterpret_cast<uint8_t *>(e), 4);
sv.push_back('\\');
sv.push_back(ch);
for (char c : e) {
n *= 16;
if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
sv.push_back(c);
} else {
throw unexpected(c);
}
}
} break;
default:
throw unexpected(ch);
}
} else {
sv.push_back(ch);
}
}
}
string JsonParser::decodeString(const string &s, bool binary) {
string result;
for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
char ch = *it;
if (ch == '\\') {
ch = *++it;
switch (ch) {
case '"':
case '\\':
case '/':
result.push_back(ch);
continue;
case 'b':
result.push_back('\b');
continue;
case 'f':
result.push_back('\f');
continue;
case 'n':
result.push_back('\n');
continue;
case 'r':
result.push_back('\r');
continue;
case 't':
result.push_back('\t');
continue;
case 'u':
case 'U': {
uint32_t n = 0;
char e[4];
for (char &i : e) {
n *= 16;
char c = *++it;
i = c;
if (isdigit(c)) {
n += c - '0';
} else if (c >= 'a' && c <= 'f') {
n += c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
n += c - 'A' + 10;
}
}
if (binary) {
if (n > 0xff) {
throw Exception(boost::format(
"Invalid byte for binary: %1%%2%")
% ch % string(e, 4));
} else {
result.push_back(n);
continue;
}
}
if (n < 0x80) {
result.push_back(n);
} else if (n < 0x800) {
result.push_back((n >> 6) | 0xc0);
result.push_back((n & 0x3f) | 0x80);
} else if (n < 0x10000) {
result.push_back((n >> 12) | 0xe0);
result.push_back(((n >> 6) & 0x3f) | 0x80);
result.push_back((n & 0x3f) | 0x80);
} else if (n < 110000) {
result.push_back((n >> 18) | 0xf0);
result.push_back(((n >> 12) & 0x3f) | 0x80);
result.push_back(((n >> 6) & 0x3f) | 0x80);
result.push_back((n & 0x3f) | 0x80);
} else {
throw Exception(boost::format(
"Invalid unicode value: %1%i%2%")
% ch % string(e, 4));
}
}
continue;
default:
throw Exception("Unexpected JSON parse state");
}
} else {
result.push_back(ch);
}
}
return result;
}
Exception JsonParser::unexpected(unsigned char c) {
std::ostringstream oss;
oss << "Unexpected character in json " << toHex(c / 16) << toHex(c % 16);
return Exception(oss.str());
}
JsonParser::Token JsonParser::tryLiteral(const char exp[], size_t n, Token tk) {
char c[100];
in_.readBytes(reinterpret_cast<uint8_t *>(c), n);
for (size_t i = 0; i < n; ++i) {
if (c[i] != exp[i]) {
throw unexpected(c[i]);
}
}
if (in_.hasMore()) {
nextChar = in_.read();
if (isdigit(nextChar) || isalpha(nextChar)) {
throw unexpected(nextChar);
}
hasNext = true;
}
return tk;
}
} // namespace json
} // namespace avro

View File

@ -1,481 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_json_JsonIO_hh__
#define avro_json_JsonIO_hh__
#include <boost/lexical_cast.hpp>
#include <boost/math/special_functions/fpclassify.hpp>
#include <boost/utility.hpp>
#include <locale>
#include <sstream>
#include <stack>
#include <string>
#include "Config.hh"
#include "Stream.hh"
namespace avro {
namespace json {
inline char toHex(unsigned int n) {
return (n < 10) ? (n + '0') : (n + 'a' - 10);
}
class AVRO_DECL JsonParser : boost::noncopyable {
public:
enum class Token {
Null,
Bool,
Long,
Double,
String,
ArrayStart,
ArrayEnd,
ObjectStart,
ObjectEnd
};
size_t line() const { return line_; }
private:
enum State {
stValue, // Expect a data type
stArray0, // Expect a data type or ']'
stArrayN, // Expect a ',' or ']'
stObject0, // Expect a string or a '}'
stObjectN, // Expect a ',' or '}'
stKey // Expect a ':'
};
std::stack<State> stateStack;
State curState;
bool hasNext;
char nextChar;
bool peeked;
StreamReader in_;
Token curToken;
bool bv;
int64_t lv;
double dv;
std::string sv;
size_t line_;
Token doAdvance();
Token tryLiteral(const char exp[], size_t n, Token tk);
Token tryNumber(char ch);
Token tryString();
static Exception unexpected(unsigned char ch);
char next();
static std::string decodeString(const std::string &s, bool binary);
public:
JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false),
curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {}
void init(InputStream &is) {
// Clear by swapping with an empty stack
std::stack<State>().swap(stateStack);
curState = stValue;
hasNext = false;
peeked = false;
line_ = 1;
in_.reset(is);
}
Token advance() {
if (!peeked) {
curToken = doAdvance();
} else {
peeked = false;
}
return curToken;
}
Token peek() {
if (!peeked) {
curToken = doAdvance();
peeked = true;
}
return curToken;
}
void expectToken(Token tk);
bool boolValue() const {
return bv;
}
Token cur() const {
return curToken;
}
double doubleValue() const {
return dv;
}
int64_t longValue() const {
return lv;
}
const std::string &rawString() const {
return sv;
}
std::string stringValue() const {
return decodeString(sv, false);
}
std::string bytesValue() const {
return decodeString(sv, true);
}
void drain() {
if (!stateStack.empty() || peeked) {
throw Exception("Invalid state for draining");
}
in_.drain(hasNext);
hasNext = false;
}
/**
* Return UTF-8 encoded string value.
*/
static std::string toStringValue(const std::string &sv) {
return decodeString(sv, false);
}
/**
* Return byte-encoded string value. It is an error if the input
* JSON string contained unicode characters more than "\u00ff'.
*/
static std::string toBytesValue(const std::string &sv) {
return decodeString(sv, true);
}
static const char *const tokenNames[];
static const char *toString(Token tk) {
return tokenNames[static_cast<size_t>(tk)];
}
};
class AVRO_DECL JsonNullFormatter {
public:
explicit JsonNullFormatter(StreamWriter &) {}
void handleObjectStart() {}
void handleObjectEnd() {}
void handleValueEnd() {}
void handleColon() {}
};
class AVRO_DECL JsonPrettyFormatter {
StreamWriter &out_;
size_t level_;
std::vector<uint8_t> indent_;
static const int CHARS_PER_LEVEL = 2;
void printIndent() {
size_t charsToIndent = level_ * CHARS_PER_LEVEL;
if (indent_.size() < charsToIndent) {
indent_.resize(charsToIndent * 2, ' ');
}
out_.writeBytes(indent_.data(), charsToIndent);
}
public:
explicit JsonPrettyFormatter(StreamWriter &out) : out_(out), level_(0), indent_(10, ' ') {}
void handleObjectStart() {
out_.write('\n');
++level_;
printIndent();
}
void handleObjectEnd() {
out_.write('\n');
--level_;
printIndent();
}
void handleValueEnd() {
out_.write('\n');
printIndent();
}
void handleColon() {
out_.write(' ');
}
};
template<class F>
class AVRO_DECL JsonGenerator {
StreamWriter out_;
F formatter_;
enum State {
stStart,
stArray0,
stArrayN,
stMap0,
stMapN,
stKey,
};
std::stack<State> stateStack;
State top;
void write(const char *b, const char *p) {
if (b != p) {
out_.writeBytes(reinterpret_cast<const uint8_t *>(b), p - b);
}
}
void escape(char c, const char *b, const char *p) {
write(b, p);
out_.write('\\');
out_.write(c);
}
void escapeCtl(char c) {
escapeUnicode(static_cast<uint8_t>(c));
}
void writeHex(char c) {
out_.write(toHex((static_cast<unsigned char>(c)) / 16));
out_.write(toHex((static_cast<unsigned char>(c)) % 16));
}
void escapeUnicode(uint32_t c) {
out_.write('\\');
out_.write('u');
writeHex((c >> 8) & 0xff);
writeHex(c & 0xff);
}
void doEncodeString(const char *b, size_t len, bool binary) {
const char *e = b + len;
out_.write('"');
for (const char *p = b; p != e; p++) {
if ((*p & 0x80) != 0) {
write(b, p);
if (binary) {
escapeCtl(*p);
} else if ((*p & 0x40) == 0) {
throw Exception("Invalid UTF-8 sequence");
} else {
int more = 1;
uint32_t value;
if ((*p & 0x20) != 0) {
more++;
if ((*p & 0x10) != 0) {
more++;
if ((*p & 0x08) != 0) {
throw Exception("Invalid UTF-8 sequence");
} else {
value = *p & 0x07;
}
} else {
value = *p & 0x0f;
}
} else {
value = *p & 0x1f;
}
for (int i = 0; i < more; ++i) {
if (++p == e || (*p & 0xc0) != 0x80) {
throw Exception("Invalid UTF-8 sequence");
}
value <<= 6;
value |= *p & 0x3f;
}
escapeUnicode(value);
}
} else {
switch (*p) {
case '\\':
case '"':
case '/':
escape(*p, b, p);
break;
case '\b':
escape('b', b, p);
break;
case '\f':
escape('f', b, p);
break;
case '\n':
escape('n', b, p);
break;
case '\r':
escape('r', b, p);
break;
case '\t':
escape('t', b, p);
break;
default:
if (std::iscntrl(*p, std::locale::classic())) {
write(b, p);
escapeCtl(*p);
break;
} else {
continue;
}
}
}
b = p + 1;
}
write(b, e);
out_.write('"');
}
void sep() {
if (top == stArrayN) {
out_.write(',');
formatter_.handleValueEnd();
} else if (top == stArray0) {
top = stArrayN;
}
}
void sep2() {
if (top == stKey) {
top = stMapN;
}
}
public:
JsonGenerator() : formatter_(out_), top(stStart) {}
void init(OutputStream &os) {
out_.reset(os);
}
void flush() {
out_.flush();
}
int64_t byteCount() const {
return out_.byteCount();
}
void encodeNull() {
sep();
out_.writeBytes(reinterpret_cast<const uint8_t *>("null"), 4);
sep2();
}
void encodeBool(bool b) {
sep();
if (b) {
out_.writeBytes(reinterpret_cast<const uint8_t *>("true"), 4);
} else {
out_.writeBytes(reinterpret_cast<const uint8_t *>("false"), 5);
}
sep2();
}
template<typename T>
void encodeNumber(T t) {
sep();
std::ostringstream oss;
oss << boost::lexical_cast<std::string>(t);
const std::string s = oss.str();
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
sep2();
}
void encodeNumber(double t) {
sep();
std::ostringstream oss;
if (boost::math::isfinite(t)) {
oss << boost::lexical_cast<std::string>(t);
} else if (boost::math::isnan(t)) {
oss << "NaN";
} else if (t == std::numeric_limits<double>::infinity()) {
oss << "Infinity";
} else {
oss << "-Infinity";
}
const std::string s = oss.str();
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
sep2();
}
void encodeString(const std::string &s) {
if (top == stMap0) {
top = stKey;
} else if (top == stMapN) {
out_.write(',');
formatter_.handleValueEnd();
top = stKey;
} else if (top == stKey) {
top = stMapN;
} else {
sep();
}
doEncodeString(s.c_str(), s.size(), false);
if (top == stKey) {
out_.write(':');
formatter_.handleColon();
}
}
void encodeBinary(const uint8_t *bytes, size_t len) {
sep();
doEncodeString(reinterpret_cast<const char *>(bytes), len, true);
sep2();
}
void arrayStart() {
sep();
stateStack.push(top);
top = stArray0;
out_.write('[');
formatter_.handleObjectStart();
}
void arrayEnd() {
top = stateStack.top();
stateStack.pop();
formatter_.handleObjectEnd();
out_.write(']');
sep2();
}
void objectStart() {
sep();
stateStack.push(top);
top = stMap0;
out_.write('{');
formatter_.handleObjectStart();
}
void objectEnd() {
top = stateStack.top();
stateStack.pop();
formatter_.handleObjectEnd();
out_.write('}');
sep2();
}
};
} // namespace json
} // namespace avro
#endif

View File

@ -1,661 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <boost/math/special_functions/fpclassify.hpp>
#include <map>
#include <memory>
#include <string>
#include "Decoder.hh"
#include "Encoder.hh"
#include "Symbol.hh"
#include "ValidSchema.hh"
#include "ValidatingCodec.hh"
#include "../json/JsonIO.hh"
namespace avro {
namespace parsing {
using std::make_shared;
using std::istringstream;
using std::map;
using std::ostringstream;
using std::reverse;
using std::string;
using std::vector;
using avro::json::JsonGenerator;
using avro::json::JsonNullFormatter;
using avro::json::JsonParser;
class JsonGrammarGenerator : public ValidatingGrammarGenerator {
ProductionPtr doGenerate(const NodePtr &n,
std::map<NodePtr, ProductionPtr> &m) final;
};
static std::string nameOf(const NodePtr &n) {
if (n->hasName()) {
return std::string(n->name());
}
std::ostringstream oss;
oss << n->type();
return oss.str();
}
ProductionPtr JsonGrammarGenerator::doGenerate(const NodePtr &n,
std::map<NodePtr, ProductionPtr> &m) {
switch (n->type()) {
case AVRO_NULL:
case AVRO_BOOL:
case AVRO_INT:
case AVRO_LONG:
case AVRO_FLOAT:
case AVRO_DOUBLE:
case AVRO_STRING:
case AVRO_BYTES:
case AVRO_FIXED:
case AVRO_ARRAY:
case AVRO_MAP:
case AVRO_SYMBOLIC:
return ValidatingGrammarGenerator::doGenerate(n, m);
case AVRO_RECORD: {
ProductionPtr result = make_shared<Production>();
m.erase(n);
size_t c = n->leaves();
result->reserve(2 + 2 * c);
result->push_back(Symbol::recordStartSymbol());
for (size_t i = 0; i < c; ++i) {
const NodePtr &leaf = n->leafAt(i);
ProductionPtr v = doGenerate(leaf, m);
result->push_back(Symbol::fieldSymbol(n->nameAt(i)));
copy(v->rbegin(), v->rend(), back_inserter(*result));
}
result->push_back(Symbol::recordEndSymbol());
reverse(result->begin(), result->end());
m[n] = result;
return make_shared<Production>(1, Symbol::indirect(result));
}
case AVRO_ENUM: {
vector<string> nn;
size_t c = n->names();
nn.reserve(c);
for (size_t i = 0; i < c; ++i) {
nn.push_back(n->nameAt(i));
}
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::nameListSymbol(nn));
result->push_back(Symbol::enumSymbol());
m[n] = result;
return result;
}
case AVRO_UNION: {
size_t c = n->leaves();
vector<ProductionPtr> vv;
vv.reserve(c);
vector<string> names;
names.reserve(c);
for (size_t i = 0; i < c; ++i) {
const NodePtr &nn = n->leafAt(i);
ProductionPtr v = doGenerate(nn, m);
if (nn->type() != AVRO_NULL) {
ProductionPtr v2 = make_shared<Production>();
v2->push_back(Symbol::recordEndSymbol());
copy(v->begin(), v->end(), back_inserter(*v2));
v.swap(v2);
}
vv.push_back(v);
names.push_back(nameOf(nn));
}
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::alternative(vv));
result->push_back(Symbol::nameListSymbol(names));
result->push_back(Symbol::unionSymbol());
return result;
}
default:
throw Exception("Unknown node type");
}
}
static void expectToken(JsonParser &in, JsonParser::Token tk) {
in.expectToken(tk);
}
class JsonDecoderHandler {
JsonParser &in_;
public:
explicit JsonDecoderHandler(JsonParser &p) : in_(p) {}
size_t handle(const Symbol &s) {
switch (s.kind()) {
case Symbol::Kind::RecordStart:
expectToken(in_, JsonParser::Token::ObjectStart);
break;
case Symbol::Kind::RecordEnd:
expectToken(in_, JsonParser::Token::ObjectEnd);
break;
case Symbol::Kind::Field:
expectToken(in_, JsonParser::Token::String);
if (s.extra<string>() != in_.stringValue()) {
throw Exception(boost::format("Incorrect field: expected \"%1%\" but got \"%2%\".") %
s.extra<string>() % in_.stringValue());
}
break;
default:
break;
}
return 0;
}
};
template<typename P>
class JsonDecoder : public Decoder {
JsonParser in_;
JsonDecoderHandler handler_;
P parser_;
void init(InputStream &is) final;
void decodeNull() final;
bool decodeBool() final;
int32_t decodeInt() final;
int64_t decodeLong() final;
float decodeFloat() final;
double decodeDouble() final;
void decodeString(string &value) final;
void skipString() final;
void decodeBytes(vector<uint8_t> &value) final;
void skipBytes() final;
void decodeFixed(size_t n, vector<uint8_t> &value) final;
void skipFixed(size_t n) final;
size_t decodeEnum() final;
size_t arrayStart() final;
size_t arrayNext() final;
size_t skipArray() final;
size_t mapStart() final;
size_t mapNext() final;
size_t skipMap() final;
size_t decodeUnionIndex() final;
void expect(JsonParser::Token tk);
void skipComposite();
void drain() final;
public:
explicit JsonDecoder(const ValidSchema &s) : handler_(in_),
parser_(JsonGrammarGenerator().generate(s), NULL, handler_) {}
};
template<typename P>
void JsonDecoder<P>::init(InputStream &is) {
in_.init(is);
parser_.reset();
}
template<typename P>
void JsonDecoder<P>::expect(JsonParser::Token tk) {
expectToken(in_, tk);
}
template<typename P>
void JsonDecoder<P>::decodeNull() {
parser_.advance(Symbol::Kind::Null);
expect(JsonParser::Token::Null);
}
template<typename P>
bool JsonDecoder<P>::decodeBool() {
parser_.advance(Symbol::Kind::Bool);
expect(JsonParser::Token::Bool);
bool result = in_.boolValue();
return result;
}
template<typename P>
int32_t JsonDecoder<P>::decodeInt() {
parser_.advance(Symbol::Kind::Int);
expect(JsonParser::Token::Long);
int64_t result = in_.longValue();
if (result < INT32_MIN || result > INT32_MAX) {
throw Exception(boost::format("Value out of range for Avro int: %1%")
% result);
}
return static_cast<int32_t>(result);
}
template<typename P>
int64_t JsonDecoder<P>::decodeLong() {
parser_.advance(Symbol::Kind::Long);
expect(JsonParser::Token::Long);
int64_t result = in_.longValue();
return result;
}
template<typename P>
float JsonDecoder<P>::decodeFloat() {
parser_.advance(Symbol::Kind::Float);
expect(JsonParser::Token::Double);
double result = in_.doubleValue();
return static_cast<float>(result);
}
template<typename P>
double JsonDecoder<P>::decodeDouble() {
parser_.advance(Symbol::Kind::Double);
expect(JsonParser::Token::Double);
double result = in_.doubleValue();
return result;
}
template<typename P>
void JsonDecoder<P>::decodeString(string &value) {
parser_.advance(Symbol::Kind::String);
expect(JsonParser::Token::String);
value = in_.stringValue();
}
template<typename P>
void JsonDecoder<P>::skipString() {
parser_.advance(Symbol::Kind::String);
expect(JsonParser::Token::String);
}
static vector<uint8_t> toBytes(const string &s) {
return vector<uint8_t>(s.begin(), s.end());
}
template<typename P>
void JsonDecoder<P>::decodeBytes(vector<uint8_t> &value) {
parser_.advance(Symbol::Kind::Bytes);
expect(JsonParser::Token::String);
value = toBytes(in_.bytesValue());
}
template<typename P>
void JsonDecoder<P>::skipBytes() {
parser_.advance(Symbol::Kind::Bytes);
expect(JsonParser::Token::String);
}
template<typename P>
void JsonDecoder<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(n);
expect(JsonParser::Token::String);
value = toBytes(in_.bytesValue());
if (value.size() != n) {
throw Exception("Incorrect value for fixed");
}
}
template<typename P>
void JsonDecoder<P>::skipFixed(size_t n) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(n);
expect(JsonParser::Token::String);
vector<uint8_t> result = toBytes(in_.bytesValue());
if (result.size() != n) {
throw Exception("Incorrect value for fixed");
}
}
template<typename P>
size_t JsonDecoder<P>::decodeEnum() {
parser_.advance(Symbol::Kind::Enum);
expect(JsonParser::Token::String);
size_t result = parser_.indexForName(in_.stringValue());
return result;
}
template<typename P>
size_t JsonDecoder<P>::arrayStart() {
parser_.advance(Symbol::Kind::ArrayStart);
parser_.pushRepeatCount(0);
expect(JsonParser::Token::ArrayStart);
return arrayNext();
}
template<typename P>
size_t JsonDecoder<P>::arrayNext() {
parser_.processImplicitActions();
if (in_.peek() == JsonParser::Token::ArrayEnd) {
in_.advance();
parser_.popRepeater();
parser_.advance(Symbol::Kind::ArrayEnd);
return 0;
}
parser_.nextRepeatCount(1);
return 1;
}
template<typename P>
void JsonDecoder<P>::skipComposite() {
size_t level = 0;
for (;;) {
switch (in_.advance()) {
case JsonParser::Token::ArrayStart:
case JsonParser::Token::ObjectStart:
++level;
continue;
case JsonParser::Token::ArrayEnd:
case JsonParser::Token::ObjectEnd:
if (level == 0) {
return;
}
--level;
continue;
default:
continue;
}
}
}
template<typename P>
void JsonDecoder<P>::drain() {
parser_.processImplicitActions();
in_.drain();
}
template<typename P>
size_t JsonDecoder<P>::skipArray() {
parser_.advance(Symbol::Kind::ArrayStart);
parser_.pop();
parser_.advance(Symbol::Kind::ArrayEnd);
expect(JsonParser::Token::ArrayStart);
skipComposite();
return 0;
}
template<typename P>
size_t JsonDecoder<P>::mapStart() {
parser_.advance(Symbol::Kind::MapStart);
parser_.pushRepeatCount(0);
expect(JsonParser::Token::ObjectStart);
return mapNext();
}
template<typename P>
size_t JsonDecoder<P>::mapNext() {
parser_.processImplicitActions();
if (in_.peek() == JsonParser::Token::ObjectEnd) {
in_.advance();
parser_.popRepeater();
parser_.advance(Symbol::Kind::MapEnd);
return 0;
}
parser_.nextRepeatCount(1);
return 1;
}
template<typename P>
size_t JsonDecoder<P>::skipMap() {
parser_.advance(Symbol::Kind::MapStart);
parser_.pop();
parser_.advance(Symbol::Kind::MapEnd);
expect(JsonParser::Token::ObjectStart);
skipComposite();
return 0;
}
template<typename P>
size_t JsonDecoder<P>::decodeUnionIndex() {
parser_.advance(Symbol::Kind::Union);
size_t result;
if (in_.peek() == JsonParser::Token::Null) {
result = parser_.indexForName("null");
} else {
expect(JsonParser::Token::ObjectStart);
expect(JsonParser::Token::String);
result = parser_.indexForName(in_.stringValue());
}
parser_.selectBranch(result);
return result;
}
template<typename F = JsonNullFormatter>
class JsonHandler {
JsonGenerator<F> &generator_;
public:
explicit JsonHandler(JsonGenerator<F> &g) : generator_(g) {}
size_t handle(const Symbol &s) {
switch (s.kind()) {
case Symbol::Kind::RecordStart:
generator_.objectStart();
break;
case Symbol::Kind::RecordEnd:
generator_.objectEnd();
break;
case Symbol::Kind::Field:
generator_.encodeString(s.extra<string>());
break;
default:
break;
}
return 0;
}
};
template<typename P, typename F = JsonNullFormatter>
class JsonEncoder : public Encoder {
JsonGenerator<F> out_;
JsonHandler<F> handler_;
P parser_;
void init(OutputStream &os) final;
void flush() final;
int64_t byteCount() const final;
void encodeNull() final;
void encodeBool(bool b) final;
void encodeInt(int32_t i) final;
void encodeLong(int64_t l) final;
void encodeFloat(float f) final;
void encodeDouble(double d) final;
void encodeString(const std::string &s) final;
void encodeBytes(const uint8_t *bytes, size_t len) final;
void encodeFixed(const uint8_t *bytes, size_t len) final;
void encodeEnum(size_t e) final;
void arrayStart() final;
void arrayEnd() final;
void mapStart() final;
void mapEnd() final;
void setItemCount(size_t count) final;
void startItem() final;
void encodeUnionIndex(size_t e) final;
public:
explicit JsonEncoder(const ValidSchema &schema) : handler_(out_),
parser_(JsonGrammarGenerator().generate(schema), NULL, handler_) {}
};
template<typename P, typename F>
void JsonEncoder<P, F>::init(OutputStream &os) {
out_.init(os);
}
template<typename P, typename F>
void JsonEncoder<P, F>::flush() {
parser_.processImplicitActions();
out_.flush();
}
template<typename P, typename F>
int64_t JsonEncoder<P, F>::byteCount() const {
return out_.byteCount();
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeNull() {
parser_.advance(Symbol::Kind::Null);
out_.encodeNull();
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeBool(bool b) {
parser_.advance(Symbol::Kind::Bool);
out_.encodeBool(b);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeInt(int32_t i) {
parser_.advance(Symbol::Kind::Int);
out_.encodeNumber(i);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeLong(int64_t l) {
parser_.advance(Symbol::Kind::Long);
out_.encodeNumber(l);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeFloat(float f) {
parser_.advance(Symbol::Kind::Float);
if (f == std::numeric_limits<float>::infinity()) {
out_.encodeString("Infinity");
} else if (-f == std::numeric_limits<float>::infinity()) {
out_.encodeString("-Infinity");
} else if (boost::math::isnan(f)) {
out_.encodeString("NaN");
} else {
out_.encodeNumber(f);
}
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeDouble(double d) {
parser_.advance(Symbol::Kind::Double);
if (d == std::numeric_limits<double>::infinity()) {
out_.encodeString("Infinity");
} else if (-d == std::numeric_limits<double>::infinity()) {
out_.encodeString("-Infinity");
} else if (boost::math::isnan(d)) {
out_.encodeString("NaN");
} else {
out_.encodeNumber(d);
}
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeString(const std::string &s) {
parser_.advance(Symbol::Kind::String);
out_.encodeString(s);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeBytes(const uint8_t *bytes, size_t len) {
parser_.advance(Symbol::Kind::Bytes);
out_.encodeBinary(bytes, len);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeFixed(const uint8_t *bytes, size_t len) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(len);
out_.encodeBinary(bytes, len);
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeEnum(size_t e) {
parser_.advance(Symbol::Kind::Enum);
const string &s = parser_.nameForIndex(e);
out_.encodeString(s);
}
template<typename P, typename F>
void JsonEncoder<P, F>::arrayStart() {
parser_.advance(Symbol::Kind::ArrayStart);
parser_.pushRepeatCount(0);
out_.arrayStart();
}
template<typename P, typename F>
void JsonEncoder<P, F>::arrayEnd() {
parser_.popRepeater();
parser_.advance(Symbol::Kind::ArrayEnd);
out_.arrayEnd();
}
template<typename P, typename F>
void JsonEncoder<P, F>::mapStart() {
parser_.advance(Symbol::Kind::MapStart);
parser_.pushRepeatCount(0);
out_.objectStart();
}
template<typename P, typename F>
void JsonEncoder<P, F>::mapEnd() {
parser_.popRepeater();
parser_.advance(Symbol::Kind::MapEnd);
out_.objectEnd();
}
template<typename P, typename F>
void JsonEncoder<P, F>::setItemCount(size_t count) {
parser_.nextRepeatCount(count);
}
template<typename P, typename F>
void JsonEncoder<P, F>::startItem() {
parser_.processImplicitActions();
if (parser_.top() != Symbol::Kind::Repeater) {
throw Exception("startItem at not an item boundary");
}
}
template<typename P, typename F>
void JsonEncoder<P, F>::encodeUnionIndex(size_t e) {
parser_.advance(Symbol::Kind::Union);
const std::string name = parser_.nameForIndex(e);
if (name != "null") {
out_.objectStart();
out_.encodeString(name);
}
parser_.selectBranch(e);
}
} // namespace parsing
DecoderPtr jsonDecoder(const ValidSchema &s) {
return std::make_shared<parsing::JsonDecoder<
parsing::SimpleParser<parsing::JsonDecoderHandler>>>(s);
}
EncoderPtr jsonEncoder(const ValidSchema &schema) {
return std::make_shared<parsing::JsonEncoder<
parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonNullFormatter>>, avro::json::JsonNullFormatter>>(schema);
}
EncoderPtr jsonPrettyEncoder(const ValidSchema &schema) {
return std::make_shared<parsing::JsonEncoder<
parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonPrettyFormatter>>, avro::json::JsonPrettyFormatter>>(schema);
}
} // namespace avro

View File

@ -1,683 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include "Decoder.hh"
#include "Encoder.hh"
#include "Generic.hh"
#include "NodeImpl.hh"
#include "Stream.hh"
#include "Symbol.hh"
#include "Types.hh"
#include "ValidSchema.hh"
#include "ValidatingCodec.hh"
namespace avro {
using std::make_shared;
namespace parsing {
using std::make_shared;
using std::shared_ptr;
using std::static_pointer_cast;
using std::find_if;
using std::istringstream;
using std::make_pair;
using std::map;
using std::ostringstream;
using std::pair;
using std::reverse;
using std::stack;
using std::string;
using std::unique_ptr;
using std::vector;
typedef pair<NodePtr, NodePtr> NodePair;
class ResolvingGrammarGenerator : public ValidatingGrammarGenerator {
ProductionPtr doGenerate2(const NodePtr &writer,
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2);
ProductionPtr resolveRecords(const NodePtr &writer,
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2);
ProductionPtr resolveUnion(const NodePtr &writer,
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2);
static vector<pair<string, size_t>> fields(const NodePtr &n) {
vector<pair<string, size_t>> result;
size_t c = n->names();
for (size_t i = 0; i < c; ++i) {
result.emplace_back(n->nameAt(i), i);
}
return result;
}
static int bestBranch(const NodePtr &writer, const NodePtr &reader);
ProductionPtr getWriterProduction(const NodePtr &n,
map<NodePtr, ProductionPtr> &m2);
public:
Symbol generate(
const ValidSchema &writer, const ValidSchema &reader);
};
Symbol ResolvingGrammarGenerator::generate(
const ValidSchema &writer, const ValidSchema &reader) {
map<NodePtr, ProductionPtr> m2;
const NodePtr &rr = reader.root();
const NodePtr &rw = writer.root();
ProductionPtr backup = ValidatingGrammarGenerator::doGenerate(rw, m2);
fixup(backup, m2);
map<NodePair, ProductionPtr> m;
ProductionPtr main = doGenerate2(rw, rr, m, m2);
fixup(main, m);
return Symbol::rootSymbol(main, backup);
}
int ResolvingGrammarGenerator::bestBranch(const NodePtr &writer,
const NodePtr &reader) {
Type t = writer->type();
const size_t c = reader->leaves();
for (size_t j = 0; j < c; ++j) {
NodePtr r = reader->leafAt(j);
if (r->type() == AVRO_SYMBOLIC) {
r = resolveSymbol(r);
}
if (t == r->type()) {
if (r->hasName()) {
if (r->name() == writer->name()) {
return j;
}
} else {
return j;
}
}
}
for (size_t j = 0; j < c; ++j) {
const NodePtr &r = reader->leafAt(j);
Type rt = r->type();
switch (t) {
case AVRO_INT:
if (rt == AVRO_LONG || rt == AVRO_DOUBLE || rt == AVRO_FLOAT) {
return j;
}
break;
case AVRO_LONG:
case AVRO_FLOAT:
if (rt == AVRO_DOUBLE) {
return j;
}
break;
default:
break;
}
}
return -1;
}
static shared_ptr<vector<uint8_t>> getAvroBinary(
const GenericDatum &defaultValue) {
EncoderPtr e = binaryEncoder();
unique_ptr<OutputStream> os = memoryOutputStream();
e->init(*os);
GenericWriter::write(*e, defaultValue);
e->flush();
return snapshot(*os);
}
template<typename T1, typename T2>
struct equalsFirst {
const T1 &v_;
explicit equalsFirst(const T1 &v) : v_(v) {}
bool operator()(const pair<T1, T2> &p) {
return p.first == v_;
}
};
ProductionPtr ResolvingGrammarGenerator::getWriterProduction(
const NodePtr &n, map<NodePtr, ProductionPtr> &m2) {
const NodePtr &nn = (n->type() == AVRO_SYMBOLIC) ? static_cast<const NodeSymbolic &>(*n).getNode() : n;
map<NodePtr, ProductionPtr>::const_iterator it2 = m2.find(nn);
if (it2 != m2.end()) {
return it2->second;
} else {
ProductionPtr result = ValidatingGrammarGenerator::doGenerate(nn, m2);
fixup(result, m2);
return result;
}
}
ProductionPtr ResolvingGrammarGenerator::resolveRecords(
const NodePtr &writer, const NodePtr &reader,
map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2) {
ProductionPtr result = make_shared<Production>();
vector<pair<string, size_t>> wf = fields(writer);
vector<pair<string, size_t>> rf = fields(reader);
vector<size_t> fieldOrder;
fieldOrder.reserve(reader->names());
/*
* We look for all writer fields in the reader. If found, recursively
* resolve the corresponding fields. Then erase the reader field.
* If no matching field is found for reader, arrange to skip the writer
* field.
*/
for (vector<pair<string, size_t>>::const_iterator it = wf.begin();
it != wf.end(); ++it) {
auto it2 = find_if(rf.begin(), rf.end(),
equalsFirst<string, size_t>(it->first));
if (it2 != rf.end()) {
ProductionPtr p = doGenerate2(writer->leafAt(it->second),
reader->leafAt(it2->second), m, m2);
copy(p->rbegin(), p->rend(), back_inserter(*result));
fieldOrder.push_back(it2->second);
rf.erase(it2);
} else {
ProductionPtr p = getWriterProduction(
writer->leafAt(it->second), m2);
result->push_back(Symbol::skipStart());
if (p->size() == 1) {
result->push_back((*p)[0]);
} else {
result->push_back(Symbol::indirect(p));
}
}
}
/*
* Examine the reader fields left out, (i.e. those didn't have corresponding
* writer field).
*/
for (vector<pair<string, size_t>>::const_iterator it = rf.begin();
it != rf.end(); ++it) {
NodePtr s = reader->leafAt(it->second);
fieldOrder.push_back(it->second);
if (s->type() == AVRO_SYMBOLIC) {
s = resolveSymbol(s);
}
shared_ptr<vector<uint8_t>> defaultBinary =
getAvroBinary(reader->defaultValueAt(it->second));
result->push_back(Symbol::defaultStartAction(defaultBinary));
map<NodePair, shared_ptr<Production>>::const_iterator it2 =
m.find(NodePair(s, s));
ProductionPtr p = (it2 == m.end()) ? doGenerate2(s, s, m, m2) : it2->second;
copy(p->rbegin(), p->rend(), back_inserter(*result));
result->push_back(Symbol::defaultEndAction());
}
reverse(result->begin(), result->end());
result->push_back(Symbol::sizeListAction(fieldOrder));
result->push_back(Symbol::recordAction());
return result;
}
ProductionPtr ResolvingGrammarGenerator::resolveUnion(
const NodePtr &writer, const NodePtr &reader,
map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2) {
vector<ProductionPtr> v;
size_t c = writer->leaves();
v.reserve(c);
for (size_t i = 0; i < c; ++i) {
ProductionPtr p = doGenerate2(writer->leafAt(i), reader, m, m2);
v.push_back(p);
}
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::alternative(v));
result->push_back(Symbol::writerUnionAction());
return result;
}
ProductionPtr ResolvingGrammarGenerator::doGenerate2(
const NodePtr &w, const NodePtr &r,
map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2) {
const NodePtr writer = w->type() == AVRO_SYMBOLIC ? resolveSymbol(w) : w;
const NodePtr reader = r->type() == AVRO_SYMBOLIC ? resolveSymbol(r) : r;
Type writerType = writer->type();
Type readerType = reader->type();
if (writerType == readerType) {
switch (writerType) {
case AVRO_NULL:
return make_shared<Production>(1, Symbol::nullSymbol());
case AVRO_BOOL:
return make_shared<Production>(1, Symbol::boolSymbol());
case AVRO_INT:
return make_shared<Production>(1, Symbol::intSymbol());
case AVRO_LONG:
return make_shared<Production>(1, Symbol::longSymbol());
case AVRO_FLOAT:
return make_shared<Production>(1, Symbol::floatSymbol());
case AVRO_DOUBLE:
return make_shared<Production>(1, Symbol::doubleSymbol());
case AVRO_STRING:
return make_shared<Production>(1, Symbol::stringSymbol());
case AVRO_BYTES:
return make_shared<Production>(1, Symbol::bytesSymbol());
case AVRO_FIXED:
if (writer->name() == reader->name() && writer->fixedSize() == reader->fixedSize()) {
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize()));
result->push_back(Symbol::fixedSymbol());
m[make_pair(writer, reader)] = result;
return result;
}
break;
case AVRO_RECORD:
if (writer->name() == reader->name()) {
const pair<NodePtr, NodePtr> key(writer, reader);
map<NodePair, ProductionPtr>::const_iterator kp = m.find(key);
if (kp != m.end()) {
return (kp->second) ? kp->second : make_shared<Production>(1, Symbol::placeholder(key));
}
m[key] = ProductionPtr();
ProductionPtr result = resolveRecords(writer, reader, m, m2);
m[key] = result;
return make_shared<Production>(1, Symbol::indirect(result));
}
break;
case AVRO_ENUM:
if (writer->name() == reader->name()) {
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::enumAdjustSymbol(writer, reader));
result->push_back(Symbol::enumSymbol());
m[make_pair(writer, reader)] = result;
return result;
}
break;
case AVRO_ARRAY: {
ProductionPtr p = getWriterProduction(writer->leafAt(0), m2);
ProductionPtr p2 = doGenerate2(writer->leafAt(0), reader->leafAt(0), m, m2);
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::arrayEndSymbol());
result->push_back(Symbol::repeater(p2, p, true));
result->push_back(Symbol::arrayStartSymbol());
return result;
}
case AVRO_MAP: {
ProductionPtr pp =
doGenerate2(writer->leafAt(1), reader->leafAt(1), m, m2);
ProductionPtr v(new Production(*pp));
v->push_back(Symbol::stringSymbol());
ProductionPtr pp2 = getWriterProduction(writer->leafAt(1), m2);
ProductionPtr v2(new Production(*pp2));
v2->push_back(Symbol::stringSymbol());
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::mapEndSymbol());
result->push_back(Symbol::repeater(v, v2, false));
result->push_back(Symbol::mapStartSymbol());
return result;
}
case AVRO_UNION:
return resolveUnion(writer, reader, m, m2);
case AVRO_SYMBOLIC: {
shared_ptr<NodeSymbolic> w2 =
static_pointer_cast<NodeSymbolic>(writer);
shared_ptr<NodeSymbolic> r2 =
static_pointer_cast<NodeSymbolic>(reader);
NodePair p(w2->getNode(), r2->getNode());
auto it = m.find(p);
if (it != m.end() && it->second) {
return it->second;
} else {
m[p] = ProductionPtr();
return make_shared<Production>(1, Symbol::placeholder(p));
}
}
default:
throw Exception("Unknown node type");
}
} else if (writerType == AVRO_UNION) {
return resolveUnion(writer, reader, m, m2);
} else {
switch (readerType) {
case AVRO_LONG:
if (writerType == AVRO_INT) {
return make_shared<Production>(1,
Symbol::resolveSymbol(Symbol::Kind::Int, Symbol::Kind::Long));
}
break;
case AVRO_FLOAT:
if (writerType == AVRO_INT || writerType == AVRO_LONG) {
return make_shared<Production>(1,
Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : Symbol::Kind::Long, Symbol::Kind::Float));
}
break;
case AVRO_DOUBLE:
if (writerType == AVRO_INT || writerType == AVRO_LONG
|| writerType == AVRO_FLOAT) {
return make_shared<Production>(1,
Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long : Symbol::Kind::Float, Symbol::Kind::Double));
}
break;
case AVRO_UNION: {
int j = bestBranch(writer, reader);
if (j >= 0) {
ProductionPtr p = doGenerate2(writer, reader->leafAt(j), m, m2);
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::unionAdjustSymbol(j, p));
result->push_back(Symbol::unionSymbol());
return result;
}
} break;
case AVRO_NULL:
case AVRO_BOOL:
case AVRO_INT:
case AVRO_STRING:
case AVRO_BYTES:
case AVRO_ENUM:
case AVRO_ARRAY:
case AVRO_MAP:
case AVRO_RECORD:
break;
default:
throw Exception("Unknown node type");
}
}
return make_shared<Production>(1, Symbol::error(writer, reader));
}
class ResolvingDecoderHandler {
shared_ptr<vector<uint8_t>> defaultData_;
unique_ptr<InputStream> inp_;
DecoderPtr backup_;
DecoderPtr &base_;
const DecoderPtr binDecoder;
public:
explicit ResolvingDecoderHandler(DecoderPtr &base) : base_(base),
binDecoder(binaryDecoder()) {}
size_t handle(const Symbol &s) {
switch (s.kind()) {
case Symbol::Kind::WriterUnion:
return base_->decodeUnionIndex();
case Symbol::Kind::DefaultStart:
defaultData_ = s.extra<shared_ptr<vector<uint8_t>>>();
backup_ = base_;
inp_ = memoryInputStream(&(*defaultData_)[0], defaultData_->size());
base_ = binDecoder;
base_->init(*inp_);
return 0;
case Symbol::Kind::DefaultEnd:
base_ = backup_;
backup_.reset();
return 0;
default:
return 0;
}
}
void reset() {
if (backup_ != nullptr) {
base_ = backup_;
backup_.reset();
}
}
};
template<typename Parser>
class ResolvingDecoderImpl : public ResolvingDecoder {
DecoderPtr base_;
ResolvingDecoderHandler handler_;
Parser parser_;
void init(InputStream &is) final;
void decodeNull() final;
bool decodeBool() final;
int32_t decodeInt() final;
int64_t decodeLong() final;
float decodeFloat() final;
double decodeDouble() final;
void decodeString(string &value) final;
void skipString() final;
void decodeBytes(vector<uint8_t> &value) final;
void skipBytes() final;
void decodeFixed(size_t n, vector<uint8_t> &value) final;
void skipFixed(size_t n) final;
size_t decodeEnum() final;
size_t arrayStart() final;
size_t arrayNext() final;
size_t skipArray() final;
size_t mapStart() final;
size_t mapNext() final;
size_t skipMap() final;
size_t decodeUnionIndex() final;
const vector<size_t> &fieldOrder() final;
void drain() final {
parser_.processImplicitActions();
base_->drain();
}
public:
ResolvingDecoderImpl(const ValidSchema &writer, const ValidSchema &reader,
DecoderPtr base) : base_(std::move(base)),
handler_(base_),
parser_(ResolvingGrammarGenerator().generate(writer, reader),
&(*base_), handler_) {
}
};
template<typename P>
void ResolvingDecoderImpl<P>::init(InputStream &is) {
handler_.reset();
base_->init(is);
parser_.reset();
}
template<typename P>
void ResolvingDecoderImpl<P>::decodeNull() {
parser_.advance(Symbol::Kind::Null);
base_->decodeNull();
}
template<typename P>
bool ResolvingDecoderImpl<P>::decodeBool() {
parser_.advance(Symbol::Kind::Bool);
return base_->decodeBool();
}
template<typename P>
int32_t ResolvingDecoderImpl<P>::decodeInt() {
parser_.advance(Symbol::Kind::Int);
return base_->decodeInt();
}
template<typename P>
int64_t ResolvingDecoderImpl<P>::decodeLong() {
Symbol::Kind k = parser_.advance(Symbol::Kind::Long);
return k == Symbol::Kind::Int ? base_->decodeInt() : base_->decodeLong();
}
template<typename P>
float ResolvingDecoderImpl<P>::decodeFloat() {
Symbol::Kind k = parser_.advance(Symbol::Kind::Float);
return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : base_->decodeFloat();
}
template<typename P>
double ResolvingDecoderImpl<P>::decodeDouble() {
Symbol::Kind k = parser_.advance(Symbol::Kind::Double);
return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : k == Symbol::Kind::Float ? base_->decodeFloat() : base_->decodeDouble();
}
template<typename P>
void ResolvingDecoderImpl<P>::decodeString(string &value) {
parser_.advance(Symbol::Kind::String);
base_->decodeString(value);
}
template<typename P>
void ResolvingDecoderImpl<P>::skipString() {
parser_.advance(Symbol::Kind::String);
base_->skipString();
}
template<typename P>
void ResolvingDecoderImpl<P>::decodeBytes(vector<uint8_t> &value) {
parser_.advance(Symbol::Kind::Bytes);
base_->decodeBytes(value);
}
template<typename P>
void ResolvingDecoderImpl<P>::skipBytes() {
parser_.advance(Symbol::Kind::Bytes);
base_->skipBytes();
}
template<typename P>
void ResolvingDecoderImpl<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(n);
return base_->decodeFixed(n, value);
}
template<typename P>
void ResolvingDecoderImpl<P>::skipFixed(size_t n) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(n);
base_->skipFixed(n);
}
template<typename P>
size_t ResolvingDecoderImpl<P>::decodeEnum() {
parser_.advance(Symbol::Kind::Enum);
size_t n = base_->decodeEnum();
return parser_.enumAdjust(n);
}
template<typename P>
size_t ResolvingDecoderImpl<P>::arrayStart() {
parser_.advance(Symbol::Kind::ArrayStart);
size_t result = base_->arrayStart();
parser_.pushRepeatCount(result);
if (result == 0) {
parser_.popRepeater();
parser_.advance(Symbol::Kind::ArrayEnd);
}
return result;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::arrayNext() {
parser_.processImplicitActions();
size_t result = base_->arrayNext();
parser_.nextRepeatCount(result);
if (result == 0) {
parser_.popRepeater();
parser_.advance(Symbol::Kind::ArrayEnd);
}
return result;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::skipArray() {
parser_.advance(Symbol::Kind::ArrayStart);
size_t n = base_->skipArray();
if (n == 0) {
parser_.pop();
} else {
parser_.pushRepeatCount(n);
parser_.skip(*base_);
}
parser_.advance(Symbol::Kind::ArrayEnd);
return 0;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::mapStart() {
parser_.advance(Symbol::Kind::MapStart);
size_t result = base_->mapStart();
parser_.pushRepeatCount(result);
if (result == 0) {
parser_.popRepeater();
parser_.advance(Symbol::Kind::MapEnd);
}
return result;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::mapNext() {
parser_.processImplicitActions();
size_t result = base_->mapNext();
parser_.nextRepeatCount(result);
if (result == 0) {
parser_.popRepeater();
parser_.advance(Symbol::Kind::MapEnd);
}
return result;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::skipMap() {
parser_.advance(Symbol::Kind::MapStart);
size_t n = base_->skipMap();
if (n == 0) {
parser_.pop();
} else {
parser_.pushRepeatCount(n);
parser_.skip(*base_);
}
parser_.advance(Symbol::Kind::MapEnd);
return 0;
}
template<typename P>
size_t ResolvingDecoderImpl<P>::decodeUnionIndex() {
parser_.advance(Symbol::Kind::Union);
return parser_.unionAdjust();
}
template<typename P>
const vector<size_t> &ResolvingDecoderImpl<P>::fieldOrder() {
parser_.advance(Symbol::Kind::Record);
return parser_.sizeList();
}
} // namespace parsing
ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
const ValidSchema &reader, const DecoderPtr &base) {
return make_shared<parsing::ResolvingDecoderImpl<parsing::SimpleParser<parsing::ResolvingDecoderHandler>>>(
writer, reader, base);
}
} // namespace avro

View File

@ -1,108 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Symbol.hh"
namespace avro {
namespace parsing {
using std::ostringstream;
using std::string;
using std::vector;
const char *Symbol::stringValues[] = {
"TerminalLow",
"Null",
"Bool",
"Int",
"Long",
"Float",
"Double",
"String",
"Bytes",
"ArrayStart",
"ArrayEnd",
"MapStart",
"MapEnd",
"Fixed",
"Enum",
"Union",
"TerminalHigh",
"SizeCheck",
"NameList",
"Root",
"Repeater",
"Alternative",
"Placeholder",
"Indirect",
"Symbolic",
"EnumAdjust",
"UnionAdjust",
"SkipStart",
"Resolve",
"ImplicitActionLow",
"RecordStart",
"RecordEnd",
"Field",
"Record",
"SizeList",
"WriterUnion",
"DefaultStart",
"DefaultEnd",
"ImplicitActionHigh",
"Error"};
Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) {
vector<string> rs;
size_t rc = reader->names();
for (size_t i = 0; i < rc; ++i) {
rs.push_back(reader->nameAt(i));
}
size_t wc = writer->names();
vector<int> adj;
adj.reserve(wc);
vector<string> err;
for (size_t i = 0; i < wc; ++i) {
const string &s = writer->nameAt(i);
vector<string>::const_iterator it = find(rs.begin(), rs.end(), s);
if (it == rs.end()) {
auto pos = err.size() + 1;
adj.push_back(-pos);
err.push_back(s);
} else {
adj.push_back(it - rs.begin());
}
}
return Symbol(Kind::EnumAdjust, make_pair(adj, err));
}
Symbol Symbol::error(const NodePtr &writer, const NodePtr &reader) {
ostringstream oss;
oss << "Cannot resolve: " << std::endl;
writer->printJson(oss, 0);
oss << std::endl
<< "with" << std::endl;
reader->printJson(oss, 0);
return Symbol(Kind::Error, oss.str());
}
} // namespace parsing
} // namespace avro

View File

@ -1,807 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_parsing_Symbol_hh__
#define avro_parsing_Symbol_hh__
#include <map>
#include <set>
#include <sstream>
#include <stack>
#include <utility>
#include <vector>
#include <boost/any.hpp>
#include <boost/tuple/tuple.hpp>
#include "Decoder.hh"
#include "Exception.hh"
#include "Node.hh"
namespace avro {
namespace parsing {
class Symbol;
typedef std::vector<Symbol> Production;
typedef std::shared_ptr<Production> ProductionPtr;
typedef boost::tuple<std::stack<ssize_t>, bool, ProductionPtr, ProductionPtr> RepeaterInfo;
typedef boost::tuple<ProductionPtr, ProductionPtr> RootInfo;
class Symbol {
public:
enum class Kind {
TerminalLow, // extra has nothing
Null,
Bool,
Int,
Long,
Float,
Double,
String,
Bytes,
ArrayStart,
ArrayEnd,
MapStart,
MapEnd,
Fixed,
Enum,
Union,
TerminalHigh,
SizeCheck, // Extra has size
NameList, // Extra has a vector<string>
Root, // Root for a schema, extra is Symbol
Repeater, // Array or Map, extra is symbol
Alternative, // One of many (union), extra is Union
Placeholder, // To be fixed up later.
Indirect, // extra is shared_ptr<Production>
Symbolic, // extra is weal_ptr<Production>
EnumAdjust,
UnionAdjust,
SkipStart,
Resolve,
ImplicitActionLow,
RecordStart,
RecordEnd,
Field, // extra is string
Record,
SizeList,
WriterUnion,
DefaultStart, // extra has default value in Avro binary encoding
DefaultEnd,
ImplicitActionHigh,
Error
};
private:
Kind kind_;
boost::any extra_;
explicit Symbol(Kind k) : kind_(k) {}
template<typename T>
Symbol(Kind k, T t) : kind_(k), extra_(t) {}
public:
Kind kind() const {
return kind_;
}
template<typename T>
T extra() const {
return boost::any_cast<T>(extra_);
}
template<typename T>
T *extrap() {
return boost::any_cast<T>(&extra_);
}
template<typename T>
const T *extrap() const {
return boost::any_cast<T>(&extra_);
}
template<typename T>
void extra(const T &t) {
extra_ = t;
}
bool isTerminal() const {
return kind_ > Kind::TerminalLow && kind_ < Kind::TerminalHigh;
}
bool isImplicitAction() const {
return kind_ > Kind::ImplicitActionLow && kind_ < Kind::ImplicitActionHigh;
}
static const char *stringValues[];
static const char *toString(Kind k) {
return stringValues[static_cast<size_t>(k)];
}
static Symbol rootSymbol(ProductionPtr &s) {
return Symbol(Kind::Root, RootInfo(s, std::make_shared<Production>()));
}
static Symbol rootSymbol(const ProductionPtr &main,
const ProductionPtr &backup) {
return Symbol(Kind::Root, RootInfo(main, backup));
}
static Symbol nullSymbol() {
return Symbol(Kind::Null);
}
static Symbol boolSymbol() {
return Symbol(Kind::Bool);
}
static Symbol intSymbol() {
return Symbol(Kind::Int);
}
static Symbol longSymbol() {
return Symbol(Kind::Long);
}
static Symbol floatSymbol() {
return Symbol(Kind::Float);
}
static Symbol doubleSymbol() {
return Symbol(Kind::Double);
}
static Symbol stringSymbol() {
return Symbol(Kind::String);
}
static Symbol bytesSymbol() {
return Symbol(Kind::Bytes);
}
static Symbol sizeCheckSymbol(size_t s) {
return Symbol(Kind::SizeCheck, s);
}
static Symbol fixedSymbol() {
return Symbol(Kind::Fixed);
}
static Symbol enumSymbol() {
return Symbol(Kind::Enum);
}
static Symbol arrayStartSymbol() {
return Symbol(Kind::ArrayStart);
}
static Symbol arrayEndSymbol() {
return Symbol(Kind::ArrayEnd);
}
static Symbol mapStartSymbol() {
return Symbol(Kind::MapStart);
}
static Symbol mapEndSymbol() {
return Symbol(Kind::MapEnd);
}
static Symbol repeater(const ProductionPtr &p,
bool isArray) {
return repeater(p, p, isArray);
}
static Symbol repeater(const ProductionPtr &read,
const ProductionPtr &skip,
bool isArray) {
std::stack<ssize_t> s;
return Symbol(Kind::Repeater, RepeaterInfo(s, isArray, read, skip));
}
static Symbol defaultStartAction(std::shared_ptr<std::vector<uint8_t>> bb) {
return Symbol(Kind::DefaultStart, std::move(bb));
}
static Symbol defaultEndAction() {
return Symbol(Kind::DefaultEnd);
}
static Symbol alternative(
const std::vector<ProductionPtr> &branches) {
return Symbol(Symbol::Kind::Alternative, branches);
}
static Symbol unionSymbol() {
return Symbol(Kind::Union);
}
static Symbol recordStartSymbol() {
return Symbol(Kind::RecordStart);
}
static Symbol recordEndSymbol() {
return Symbol(Kind::RecordEnd);
}
static Symbol fieldSymbol(const std::string &name) {
return Symbol(Kind::Field, name);
}
static Symbol writerUnionAction() {
return Symbol(Kind::WriterUnion);
}
static Symbol nameListSymbol(
const std::vector<std::string> &v) {
return Symbol(Kind::NameList, v);
}
template<typename T>
static Symbol placeholder(const T &n) {
return Symbol(Kind::Placeholder, n);
}
static Symbol indirect(const ProductionPtr &p) {
return Symbol(Kind::Indirect, p);
}
static Symbol symbolic(const std::weak_ptr<Production> &p) {
return Symbol(Kind::Symbolic, p);
}
static Symbol enumAdjustSymbol(const NodePtr &writer,
const NodePtr &reader);
static Symbol unionAdjustSymbol(size_t branch,
const ProductionPtr &p) {
return Symbol(Kind::UnionAdjust, std::make_pair(branch, p));
}
static Symbol sizeListAction(std::vector<size_t> order) {
return Symbol(Kind::SizeList, std::move(order));
}
static Symbol recordAction() {
return Symbol(Kind::Record);
}
static Symbol error(const NodePtr &writer, const NodePtr &reader);
static Symbol resolveSymbol(Kind w, Kind r) {
return Symbol(Kind::Resolve, std::make_pair(w, r));
}
static Symbol skipStart() {
return Symbol(Kind::SkipStart);
}
};
/**
* Recursively replaces all placeholders in the production with the
* corresponding values.
*/
template<typename T>
void fixup(const ProductionPtr &p,
const std::map<T, ProductionPtr> &m) {
std::set<ProductionPtr> seen;
for (auto &it : *p) {
fixup(it, m, seen);
}
}
/**
* Recursively replaces all placeholders in the symbol with the values with the
* corresponding values.
*/
template<typename T>
void fixup_internal(const ProductionPtr &p,
const std::map<T, ProductionPtr> &m,
std::set<ProductionPtr> &seen) {
if (seen.find(p) == seen.end()) {
seen.insert(p);
for (auto &it : *p) {
fixup(it, m, seen);
}
}
}
template<typename T>
void fixup(Symbol &s, const std::map<T, ProductionPtr> &m,
std::set<ProductionPtr> &seen) {
switch (s.kind()) {
case Symbol::Kind::Indirect:
fixup_internal(s.extra<ProductionPtr>(), m, seen);
break;
case Symbol::Kind::Alternative: {
const std::vector<ProductionPtr> *vv =
s.extrap<std::vector<ProductionPtr>>();
for (const auto &it : *vv) {
fixup_internal(it, m, seen);
}
} break;
case Symbol::Kind::Repeater: {
const RepeaterInfo &ri = *s.extrap<RepeaterInfo>();
fixup_internal(boost::tuples::get<2>(ri), m, seen);
fixup_internal(boost::tuples::get<3>(ri), m, seen);
} break;
case Symbol::Kind::Placeholder: {
typename std::map<T, std::shared_ptr<Production>>::const_iterator it =
m.find(s.extra<T>());
if (it == m.end()) {
throw Exception("Placeholder symbol cannot be resolved");
}
s = Symbol::symbolic(std::weak_ptr<Production>(it->second));
} break;
case Symbol::Kind::UnionAdjust:
fixup_internal(s.extrap<std::pair<size_t, ProductionPtr>>()->second,
m, seen);
break;
default:
break;
}
}
template<typename Handler>
class SimpleParser {
Decoder *decoder_;
Handler &handler_;
std::stack<Symbol> parsingStack;
static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) {
std::ostringstream oss;
oss << "Invalid operation. Schema requires: " << Symbol::toString(expected) << ", got: " << Symbol::toString(actual);
throw Exception(oss.str());
}
static void assertMatch(Symbol::Kind actual, Symbol::Kind expected) {
if (expected != actual) {
throwMismatch(actual, expected);
}
}
void append(const ProductionPtr &ss) {
for (Production::const_iterator it = ss->begin();
it != ss->end(); ++it) {
parsingStack.push(*it);
}
}
size_t popSize() {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::SizeCheck, s.kind());
auto result = s.extra<size_t>();
parsingStack.pop();
return result;
}
static void assertLessThan(size_t n, size_t s) {
if (n >= s) {
std::ostringstream oss;
oss << "Size max value. Upper bound: " << s << " found " << n;
throw Exception(oss.str());
}
}
public:
Symbol::Kind advance(Symbol::Kind k) {
for (;;) {
Symbol &s = parsingStack.top();
// std::cout << "advance: " << Symbol::toString(s.kind())
// << " looking for " << Symbol::toString(k) << '\n';
if (s.kind() == k) {
parsingStack.pop();
return k;
} else if (s.isTerminal()) {
throwMismatch(k, s.kind());
} else {
switch (s.kind()) {
case Symbol::Kind::Root:
append(boost::tuples::get<0>(*s.extrap<RootInfo>()));
continue;
case Symbol::Kind::Indirect: {
ProductionPtr pp =
s.extra<ProductionPtr>();
parsingStack.pop();
append(pp);
}
continue;
case Symbol::Kind::Symbolic: {
ProductionPtr pp(
s.extra<std::weak_ptr<Production>>());
parsingStack.pop();
append(pp);
}
continue;
case Symbol::Kind::Repeater: {
auto *p = s.extrap<RepeaterInfo>();
std::stack<ssize_t> &ns = boost::tuples::get<0>(*p);
if (ns.empty()) {
throw Exception(
"Empty item count stack in repeater advance");
}
if (ns.top() == 0) {
throw Exception(
"Zero item count in repeater advance");
}
--ns.top();
append(boost::tuples::get<2>(*p));
}
continue;
case Symbol::Kind::Error:
throw Exception(s.extra<std::string>());
case Symbol::Kind::Resolve: {
const std::pair<Symbol::Kind, Symbol::Kind> *p =
s.extrap<std::pair<Symbol::Kind, Symbol::Kind>>();
assertMatch(p->second, k);
Symbol::Kind result = p->first;
parsingStack.pop();
return result;
}
case Symbol::Kind::SkipStart:
parsingStack.pop();
skip(*decoder_);
break;
default:
if (s.isImplicitAction()) {
size_t n = handler_.handle(s);
if (s.kind() == Symbol::Kind::WriterUnion) {
parsingStack.pop();
selectBranch(n);
} else {
parsingStack.pop();
}
} else {
std::ostringstream oss;
oss << "Encountered " << Symbol::toString(s.kind())
<< " while looking for " << Symbol::toString(k);
throw Exception(oss.str());
}
}
}
}
}
void skip(Decoder &d) {
const size_t sz = parsingStack.size();
if (sz == 0) {
throw Exception("Nothing to skip!");
}
while (parsingStack.size() >= sz) {
Symbol &t = parsingStack.top();
// std::cout << "skip: " << Symbol::toString(t.kind()) << '\n';
switch (t.kind()) {
case Symbol::Kind::Null:
d.decodeNull();
break;
case Symbol::Kind::Bool:
d.decodeBool();
break;
case Symbol::Kind::Int:
d.decodeInt();
break;
case Symbol::Kind::Long:
d.decodeLong();
break;
case Symbol::Kind::Float:
d.decodeFloat();
break;
case Symbol::Kind::Double:
d.decodeDouble();
break;
case Symbol::Kind::String:
d.skipString();
break;
case Symbol::Kind::Bytes:
d.skipBytes();
break;
case Symbol::Kind::ArrayStart: {
parsingStack.pop();
size_t n = d.skipArray();
processImplicitActions();
assertMatch(Symbol::Kind::Repeater, parsingStack.top().kind());
if (n == 0) {
break;
}
Symbol &t2 = parsingStack.top();
auto *p = t2.extrap<RepeaterInfo>();
boost::tuples::get<0>(*p).push(n);
continue;
}
case Symbol::Kind::ArrayEnd:
break;
case Symbol::Kind::MapStart: {
parsingStack.pop();
size_t n = d.skipMap();
processImplicitActions();
assertMatch(Symbol::Kind::Repeater, parsingStack.top().kind());
if (n == 0) {
break;
}
Symbol &t2 = parsingStack.top();
auto *p2 = t2.extrap<RepeaterInfo>();
boost::tuples::get<0>(*p2).push(n);
continue;
}
case Symbol::Kind::MapEnd:
break;
case Symbol::Kind::Fixed: {
parsingStack.pop();
Symbol &t2 = parsingStack.top();
d.decodeFixed(t2.extra<size_t>());
} break;
case Symbol::Kind::Enum:
parsingStack.pop();
d.decodeEnum();
break;
case Symbol::Kind::Union: {
parsingStack.pop();
size_t n = d.decodeUnionIndex();
selectBranch(n);
continue;
}
case Symbol::Kind::Repeater: {
auto *p = t.extrap<RepeaterInfo>();
std::stack<ssize_t> &ns = boost::tuples::get<0>(*p);
if (ns.empty()) {
throw Exception(
"Empty item count stack in repeater skip");
}
ssize_t &n = ns.top();
if (n == 0) {
n = boost::tuples::get<1>(*p) ? d.arrayNext()
: d.mapNext();
}
if (n != 0) {
--n;
append(boost::tuples::get<3>(*p));
continue;
} else {
ns.pop();
}
break;
}
case Symbol::Kind::Indirect: {
ProductionPtr pp =
t.extra<ProductionPtr>();
parsingStack.pop();
append(pp);
}
continue;
case Symbol::Kind::Symbolic: {
ProductionPtr pp(
t.extra<std::weak_ptr<Production>>());
parsingStack.pop();
append(pp);
}
continue;
default: {
std::ostringstream oss;
oss << "Don't know how to skip "
<< Symbol::toString(t.kind());
throw Exception(oss.str());
}
}
parsingStack.pop();
}
}
void assertSize(size_t n) {
size_t s = popSize();
if (s != n) {
std::ostringstream oss;
oss << "Incorrect size. Expected: " << s << " found " << n;
throw Exception(oss.str());
}
}
void assertLessThanSize(size_t n) {
assertLessThan(n, popSize());
}
size_t enumAdjust(size_t n) {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::EnumAdjust, s.kind());
const auto *v = s.extrap<std::pair<std::vector<int>, std::vector<std::string>>>();
assertLessThan(n, v->first.size());
int result = v->first[n];
if (result < 0) {
std::ostringstream oss;
oss << "Cannot resolve symbol: " << v->second[-result - 1]
<< std::endl;
throw Exception(oss.str());
}
parsingStack.pop();
return result;
}
size_t unionAdjust() {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::UnionAdjust, s.kind());
std::pair<size_t, ProductionPtr> p =
s.extra<std::pair<size_t, ProductionPtr>>();
parsingStack.pop();
append(p.second);
return p.first;
}
std::string nameForIndex(size_t e) {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::NameList, s.kind());
const std::vector<std::string> names =
s.extra<std::vector<std::string>>();
if (e >= names.size()) {
throw Exception("Not that many names");
}
std::string result = names[e];
parsingStack.pop();
return result;
}
size_t indexForName(const std::string &name) {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::NameList, s.kind());
const std::vector<std::string> names =
s.extra<std::vector<std::string>>();
auto it = std::find(names.begin(), names.end(), name);
if (it == names.end()) {
throw Exception("No such enum symbol");
}
size_t result = it - names.begin();
parsingStack.pop();
return result;
}
void pushRepeatCount(size_t n) {
processImplicitActions();
Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::Repeater, s.kind());
auto *p = s.extrap<RepeaterInfo>();
std::stack<ssize_t> &nn = boost::tuples::get<0>(*p);
nn.push(n);
}
void nextRepeatCount(size_t n) {
processImplicitActions();
Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::Repeater, s.kind());
auto *p = s.extrap<RepeaterInfo>();
std::stack<ssize_t> &nn = boost::tuples::get<0>(*p);
if (nn.empty() || nn.top() != 0) {
throw Exception("Wrong number of items");
}
nn.top() = n;
}
void popRepeater() {
processImplicitActions();
Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::Repeater, s.kind());
auto *p = s.extrap<RepeaterInfo>();
std::stack<ssize_t> &ns = boost::tuples::get<0>(*p);
if (ns.empty()) {
throw Exception("Incorrect number of items (empty)");
}
if (ns.top() > 0) {
throw Exception("Incorrect number of items (non-zero)");
}
ns.pop();
parsingStack.pop();
}
void selectBranch(size_t n) {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::Alternative, s.kind());
std::vector<ProductionPtr> v =
s.extra<std::vector<ProductionPtr>>();
if (n >= v.size()) {
throw Exception("Not that many branches");
}
parsingStack.pop();
append(v[n]);
}
const std::vector<size_t> &sizeList() {
const Symbol &s = parsingStack.top();
assertMatch(Symbol::Kind::SizeList, s.kind());
return *s.extrap<std::vector<size_t>>();
}
Symbol::Kind top() const {
return parsingStack.top().kind();
}
void pop() {
parsingStack.pop();
}
void processImplicitActions() {
for (;;) {
Symbol &s = parsingStack.top();
if (s.isImplicitAction()) {
handler_.handle(s);
parsingStack.pop();
} else if (s.kind() == Symbol::Kind::SkipStart) {
parsingStack.pop();
skip(*decoder_);
} else {
break;
}
}
}
SimpleParser(const Symbol &s, Decoder *d, Handler &h) : decoder_(d), handler_(h) {
parsingStack.push(s);
}
void reset() {
while (parsingStack.size() > 1) {
parsingStack.pop();
}
}
};
inline std::ostream &operator<<(std::ostream &os, const Symbol &s);
inline std::ostream &operator<<(std::ostream &os, const Production &p) {
os << '(';
for (const auto &it : p) {
os << it << ", ";
}
os << ')';
return os;
}
inline std::ostream &operator<<(std::ostream &os, const Symbol &s) {
switch (s.kind()) {
case Symbol::Kind::Repeater: {
const RepeaterInfo &ri = *s.extrap<RepeaterInfo>();
os << '(' << Symbol::toString(s.kind())
<< ' ' << *boost::tuples::get<2>(ri)
<< ' ' << *boost::tuples::get<3>(ri)
<< ')';
} break;
case Symbol::Kind::Indirect: {
os << '(' << Symbol::toString(s.kind()) << ' '
<< *s.extra<std::shared_ptr<Production>>() << ')';
} break;
case Symbol::Kind::Alternative: {
os << '(' << Symbol::toString(s.kind());
for (const auto &it : *s.extrap<std::vector<ProductionPtr>>()) {
os << ' ' << *it;
}
os << ')';
} break;
case Symbol::Kind::Symbolic: {
os << '(' << Symbol::toString(s.kind())
<< ' ' << s.extra<std::weak_ptr<Production>>().lock()
<< ')';
} break;
default:
os << Symbol::toString(s.kind());
break;
}
return os;
}
} // namespace parsing
} // namespace avro
#endif

View File

@ -1,534 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ValidatingCodec.hh"
#include <algorithm>
#include <boost/any.hpp>
#include <map>
#include <memory>
#include <utility>
#include "Decoder.hh"
#include "Encoder.hh"
#include "NodeImpl.hh"
#include "ValidSchema.hh"
namespace avro {
using std::make_shared;
namespace parsing {
using std::shared_ptr;
using std::static_pointer_cast;
using std::map;
using std::ostringstream;
using std::pair;
using std::reverse;
using std::string;
using std::vector;
/** Follows the design of Avro Parser in Java. */
ProductionPtr ValidatingGrammarGenerator::generate(const NodePtr &n) {
map<NodePtr, ProductionPtr> m;
ProductionPtr result = doGenerate(n, m);
fixup(result, m);
return result;
}
Symbol ValidatingGrammarGenerator::generate(const ValidSchema &schema) {
ProductionPtr r = generate(schema.root());
return Symbol::rootSymbol(r);
}
ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr &n,
map<NodePtr, ProductionPtr> &m) {
switch (n->type()) {
case AVRO_NULL:
return make_shared<Production>(1, Symbol::nullSymbol());
case AVRO_BOOL:
return make_shared<Production>(1, Symbol::boolSymbol());
case AVRO_INT:
return make_shared<Production>(1, Symbol::intSymbol());
case AVRO_LONG:
return make_shared<Production>(1, Symbol::longSymbol());
case AVRO_FLOAT:
return make_shared<Production>(1, Symbol::floatSymbol());
case AVRO_DOUBLE:
return make_shared<Production>(1, Symbol::doubleSymbol());
case AVRO_STRING:
return make_shared<Production>(1, Symbol::stringSymbol());
case AVRO_BYTES:
return make_shared<Production>(1, Symbol::bytesSymbol());
case AVRO_FIXED: {
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::sizeCheckSymbol(n->fixedSize()));
result->push_back(Symbol::fixedSymbol());
m[n] = result;
return result;
}
case AVRO_RECORD: {
ProductionPtr result = make_shared<Production>();
m.erase(n);
size_t c = n->leaves();
for (size_t i = 0; i < c; ++i) {
const NodePtr &leaf = n->leafAt(i);
ProductionPtr v = doGenerate(leaf, m);
copy(v->rbegin(), v->rend(), back_inserter(*result));
}
reverse(result->begin(), result->end());
m[n] = result;
return make_shared<Production>(1, Symbol::indirect(result));
}
case AVRO_ENUM: {
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::sizeCheckSymbol(n->names()));
result->push_back(Symbol::enumSymbol());
m[n] = result;
return result;
}
case AVRO_ARRAY: {
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::arrayEndSymbol());
result->push_back(Symbol::repeater(doGenerate(n->leafAt(0), m), true));
result->push_back(Symbol::arrayStartSymbol());
return result;
}
case AVRO_MAP: {
ProductionPtr pp = doGenerate(n->leafAt(1), m);
ProductionPtr v(new Production(*pp));
v->push_back(Symbol::stringSymbol());
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::mapEndSymbol());
result->push_back(Symbol::repeater(v, false));
result->push_back(Symbol::mapStartSymbol());
return result;
}
case AVRO_UNION: {
vector<ProductionPtr> vv;
size_t c = n->leaves();
vv.reserve(c);
for (size_t i = 0; i < c; ++i) {
vv.push_back(doGenerate(n->leafAt(i), m));
}
ProductionPtr result = make_shared<Production>();
result->push_back(Symbol::alternative(vv));
result->push_back(Symbol::unionSymbol());
return result;
}
case AVRO_SYMBOLIC: {
shared_ptr<NodeSymbolic> ns = static_pointer_cast<NodeSymbolic>(n);
NodePtr nn = ns->getNode();
auto it = m.find(nn);
if (it != m.end() && it->second) {
return it->second;
} else {
m[nn] = ProductionPtr();
return make_shared<Production>(1, Symbol::placeholder(nn));
}
}
default:
throw Exception("Unknown node type");
}
}
struct DummyHandler {
static size_t handle(const Symbol &s) {
return 0;
}
};
template<typename P>
class ValidatingDecoder : public Decoder {
const shared_ptr<Decoder> base;
DummyHandler handler_;
P parser;
void init(InputStream &is) final;
void decodeNull() final;
bool decodeBool() final;
int32_t decodeInt() final;
int64_t decodeLong() final;
float decodeFloat() final;
double decodeDouble() final;
void decodeString(string &value) final;
void skipString() final;
void decodeBytes(vector<uint8_t> &value) final;
void skipBytes() final;
void decodeFixed(size_t n, vector<uint8_t> &value) final;
void skipFixed(size_t n) final;
size_t decodeEnum() final;
size_t arrayStart() final;
size_t arrayNext() final;
size_t skipArray() final;
size_t mapStart() final;
size_t mapNext() final;
size_t skipMap() final;
size_t decodeUnionIndex() final;
void drain() final {
base->drain();
}
public:
ValidatingDecoder(const ValidSchema &s, const shared_ptr<Decoder> &b) : base(b),
parser(ValidatingGrammarGenerator().generate(s), NULL, handler_) {}
};
template<typename P>
void ValidatingDecoder<P>::init(InputStream &is) {
base->init(is);
}
template<typename P>
void ValidatingDecoder<P>::decodeNull() {
parser.advance(Symbol::Kind::Null);
base->decodeNull();
}
template<typename P>
bool ValidatingDecoder<P>::decodeBool() {
parser.advance(Symbol::Kind::Bool);
return base->decodeBool();
}
template<typename P>
int32_t ValidatingDecoder<P>::decodeInt() {
parser.advance(Symbol::Kind::Int);
return base->decodeInt();
}
template<typename P>
int64_t ValidatingDecoder<P>::decodeLong() {
parser.advance(Symbol::Kind::Long);
return base->decodeLong();
}
template<typename P>
float ValidatingDecoder<P>::decodeFloat() {
parser.advance(Symbol::Kind::Float);
return base->decodeFloat();
}
template<typename P>
double ValidatingDecoder<P>::decodeDouble() {
parser.advance(Symbol::Kind::Double);
return base->decodeDouble();
}
template<typename P>
void ValidatingDecoder<P>::decodeString(string &value) {
parser.advance(Symbol::Kind::String);
base->decodeString(value);
}
template<typename P>
void ValidatingDecoder<P>::skipString() {
parser.advance(Symbol::Kind::String);
base->skipString();
}
template<typename P>
void ValidatingDecoder<P>::decodeBytes(vector<uint8_t> &value) {
parser.advance(Symbol::Kind::Bytes);
base->decodeBytes(value);
}
template<typename P>
void ValidatingDecoder<P>::skipBytes() {
parser.advance(Symbol::Kind::Bytes);
base->skipBytes();
}
template<typename P>
void ValidatingDecoder<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
parser.advance(Symbol::Kind::Fixed);
parser.assertSize(n);
base->decodeFixed(n, value);
}
template<typename P>
void ValidatingDecoder<P>::skipFixed(size_t n) {
parser.advance(Symbol::Kind::Fixed);
parser.assertSize(n);
base->skipFixed(n);
}
template<typename P>
size_t ValidatingDecoder<P>::decodeEnum() {
parser.advance(Symbol::Kind::Enum);
size_t result = base->decodeEnum();
parser.assertLessThanSize(result);
return result;
}
template<typename P>
size_t ValidatingDecoder<P>::arrayStart() {
parser.advance(Symbol::Kind::ArrayStart);
size_t result = base->arrayStart();
parser.pushRepeatCount(result);
if (result == 0) {
parser.popRepeater();
parser.advance(Symbol::Kind::ArrayEnd);
}
return result;
}
template<typename P>
size_t ValidatingDecoder<P>::arrayNext() {
size_t result = base->arrayNext();
parser.nextRepeatCount(result);
if (result == 0) {
parser.popRepeater();
parser.advance(Symbol::Kind::ArrayEnd);
}
return result;
}
template<typename P>
size_t ValidatingDecoder<P>::skipArray() {
parser.advance(Symbol::Kind::ArrayStart);
size_t n = base->skipArray();
if (n == 0) {
parser.pop();
} else {
parser.pushRepeatCount(n);
parser.skip(*base);
}
parser.advance(Symbol::Kind::ArrayEnd);
return 0;
}
template<typename P>
size_t ValidatingDecoder<P>::mapStart() {
parser.advance(Symbol::Kind::MapStart);
size_t result = base->mapStart();
parser.pushRepeatCount(result);
if (result == 0) {
parser.popRepeater();
parser.advance(Symbol::Kind::MapEnd);
}
return result;
}
template<typename P>
size_t ValidatingDecoder<P>::mapNext() {
size_t result = base->mapNext();
parser.nextRepeatCount(result);
if (result == 0) {
parser.popRepeater();
parser.advance(Symbol::Kind::MapEnd);
}
return result;
}
template<typename P>
size_t ValidatingDecoder<P>::skipMap() {
parser.advance(Symbol::Kind::MapStart);
size_t n = base->skipMap();
if (n == 0) {
parser.pop();
} else {
parser.pushRepeatCount(n);
parser.skip(*base);
}
parser.advance(Symbol::Kind::MapEnd);
return 0;
}
template<typename P>
size_t ValidatingDecoder<P>::decodeUnionIndex() {
parser.advance(Symbol::Kind::Union);
size_t result = base->decodeUnionIndex();
parser.selectBranch(result);
return result;
}
template<typename P>
class ValidatingEncoder : public Encoder {
DummyHandler handler_;
P parser_;
EncoderPtr base_;
void init(OutputStream &os) final;
void flush() final;
int64_t byteCount() const final;
void encodeNull() final;
void encodeBool(bool b) final;
void encodeInt(int32_t i) final;
void encodeLong(int64_t l) final;
void encodeFloat(float f) final;
void encodeDouble(double d) final;
void encodeString(const std::string &s) final;
void encodeBytes(const uint8_t *bytes, size_t len) final;
void encodeFixed(const uint8_t *bytes, size_t len) final;
void encodeEnum(size_t e) final;
void arrayStart() final;
void arrayEnd() final;
void mapStart() final;
void mapEnd() final;
void setItemCount(size_t count) final;
void startItem() final;
void encodeUnionIndex(size_t e) final;
public:
ValidatingEncoder(const ValidSchema &schema, EncoderPtr base) : parser_(ValidatingGrammarGenerator().generate(schema), NULL, handler_),
base_(std::move(base)) {}
};
template<typename P>
void ValidatingEncoder<P>::init(OutputStream &os) {
base_->init(os);
}
template<typename P>
void ValidatingEncoder<P>::flush() {
base_->flush();
}
template<typename P>
void ValidatingEncoder<P>::encodeNull() {
parser_.advance(Symbol::Kind::Null);
base_->encodeNull();
}
template<typename P>
void ValidatingEncoder<P>::encodeBool(bool b) {
parser_.advance(Symbol::Kind::Bool);
base_->encodeBool(b);
}
template<typename P>
void ValidatingEncoder<P>::encodeInt(int32_t i) {
parser_.advance(Symbol::Kind::Int);
base_->encodeInt(i);
}
template<typename P>
void ValidatingEncoder<P>::encodeLong(int64_t l) {
parser_.advance(Symbol::Kind::Long);
base_->encodeLong(l);
}
template<typename P>
void ValidatingEncoder<P>::encodeFloat(float f) {
parser_.advance(Symbol::Kind::Float);
base_->encodeFloat(f);
}
template<typename P>
void ValidatingEncoder<P>::encodeDouble(double d) {
parser_.advance(Symbol::Kind::Double);
base_->encodeDouble(d);
}
template<typename P>
void ValidatingEncoder<P>::encodeString(const std::string &s) {
parser_.advance(Symbol::Kind::String);
base_->encodeString(s);
}
template<typename P>
void ValidatingEncoder<P>::encodeBytes(const uint8_t *bytes, size_t len) {
parser_.advance(Symbol::Kind::Bytes);
base_->encodeBytes(bytes, len);
}
template<typename P>
void ValidatingEncoder<P>::encodeFixed(const uint8_t *bytes, size_t len) {
parser_.advance(Symbol::Kind::Fixed);
parser_.assertSize(len);
base_->encodeFixed(bytes, len);
}
template<typename P>
void ValidatingEncoder<P>::encodeEnum(size_t e) {
parser_.advance(Symbol::Kind::Enum);
parser_.assertLessThanSize(e);
base_->encodeEnum(e);
}
template<typename P>
void ValidatingEncoder<P>::arrayStart() {
parser_.advance(Symbol::Kind::ArrayStart);
parser_.pushRepeatCount(0);
base_->arrayStart();
}
template<typename P>
void ValidatingEncoder<P>::arrayEnd() {
parser_.popRepeater();
parser_.advance(Symbol::Kind::ArrayEnd);
base_->arrayEnd();
}
template<typename P>
void ValidatingEncoder<P>::mapStart() {
parser_.advance(Symbol::Kind::MapStart);
parser_.pushRepeatCount(0);
base_->mapStart();
}
template<typename P>
void ValidatingEncoder<P>::mapEnd() {
parser_.popRepeater();
parser_.advance(Symbol::Kind::MapEnd);
base_->mapEnd();
}
template<typename P>
void ValidatingEncoder<P>::setItemCount(size_t count) {
parser_.nextRepeatCount(count);
base_->setItemCount(count);
}
template<typename P>
void ValidatingEncoder<P>::startItem() {
if (parser_.top() != Symbol::Kind::Repeater) {
throw Exception("startItem at not an item boundary");
}
base_->startItem();
}
template<typename P>
void ValidatingEncoder<P>::encodeUnionIndex(size_t e) {
parser_.advance(Symbol::Kind::Union);
parser_.selectBranch(e);
base_->encodeUnionIndex(e);
}
template<typename P>
int64_t ValidatingEncoder<P>::byteCount() const {
return base_->byteCount();
}
} // namespace parsing
DecoderPtr validatingDecoder(const ValidSchema &s,
const DecoderPtr &base) {
return make_shared<parsing::ValidatingDecoder<parsing::SimpleParser<parsing::DummyHandler>>>(s, base);
}
EncoderPtr validatingEncoder(const ValidSchema &schema, const EncoderPtr &base) {
return make_shared<parsing::ValidatingEncoder<parsing::SimpleParser<parsing::DummyHandler>>>(schema, base);
}
} // namespace avro

View File

@ -1,46 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_parsing_ValidatingCodec_hh__
#define avro_parsing_ValidatingCodec_hh__
#include <map>
#include <vector>
#include "NodeImpl.hh"
#include "Symbol.hh"
#include "ValidSchema.hh"
namespace avro {
namespace parsing {
class ValidatingGrammarGenerator {
protected:
virtual ProductionPtr doGenerate(const NodePtr &n,
std::map<NodePtr, ProductionPtr> &m);
ProductionPtr generate(const NodePtr &schema);
public:
Symbol generate(const ValidSchema &schema);
};
} // namespace parsing
} // namespace avro
#endif

View File

@ -1 +0,0 @@
{ "type" : "array", "items" : "int" , "name":"test" }

View File

@ -1,113 +0,0 @@
{
"type": "record",
"doc": "Top level Doc.",
"name": "RootRecord",
"fields": [
{
"name": "mylong",
"doc": "mylong field doc.",
"type": "long"
},
{
"name": "nestedrecord",
"type": {
"type": "record",
"name": "Nested",
"fields": [
{
"name": "inval1",
"type": "double"
},
{
"name": "inval2",
"type": "string"
},
{
"name": "inval3",
"type": "int"
}
]
}
},
{
"name": "mymap",
"type": {
"type": "map",
"values": "int"
}
},
{
"name": "recordmap",
"type": {
"type": "map",
"values": "Nested"
}
},
{
"name": "myarray",
"type": {
"type": "array",
"items": "double"
}
},
{
"name": "myenum",
"type": {
"type": "enum",
"name": "ExampleEnum",
"symbols": [
"zero",
"one",
"two",
"three"
]
}
},
{
"name": "myunion",
"type": [
"null",
{
"type": "map",
"values": "int"
},
"float"
]
},
{
"name": "anotherunion",
"type": [
"bytes",
"null"
]
},
{
"name": "mybool",
"type": "boolean"
},
{
"name": "anothernested",
"type": "Nested"
},
{
"name": "myfixed",
"type": {
"type": "fixed",
"size": 16,
"name": "md5"
}
},
{
"name": "anotherint",
"type": "int"
},
{
"name": "bytes",
"type": "bytes"
},
{
"name": "null",
"type": "null"
}
]
}

Some files were not shown because too many files have changed in this diff Show More