reduce files to smaller and delete no use files and function

2021-07-07 09:52:18 +08:00 · 2021-07-07 09:52:18 +08:00 · f9bf4244ce
parent 7c1ae052ef
commit f9bf4244ce
38 changed files with 33 additions and 9139 deletions
--- a/deps/SZ/sz/Makefile.am
+++ b/deps/SZ/sz/Makefile.am
@ -1,93 +0,0 @@
-#AM_CFLAGS = -I./include -I../zlib
-#LDFLAGS=-fPIC -shared
-
-AUTOMAKE_OPTIONS=foreign
-if FORTRAN
-include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\
-		include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\
-		include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h include/szf.h\
-		include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\
-		include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\
-		include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\
-		include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\
-		include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\
-		include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\
-		include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h sz.mod rw.mod
-lib_LTLIBRARIES=libSZ.la
-libSZ_la_CFLAGS=-I./include -I../zlib/ -I../zstd/
-if TIMECMPR
-libSZ_la_CFLAGS+=-DHAVE_TIMECMPR
-endif
-if RANDOMACCESS
-libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS
-endif
-if OPENMP
-libSZ_la_CFLAGS+=-fopenmp
-endif
-libSZ_la_LDFLAGS = -version-info  2:1:0
-libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zstd/.libs/libzstd.a
-libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \
-		src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c src/szf.c \
-		src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\
-		src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \
-		src/conf.c src/DynamicDoubleArray.c src/rwf.c src/TypeManager.c \
-		src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \
-		src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\
-		src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\
-		src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\
-		src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/CacheTable.c\
-		src/sz_interface.F90 src/rw_interface.F90 src/exafelSZ.c
-libSZ_la_LINK=$(AM_V_CC)$(LIBTOOL) --tag=FC --mode=link $(FCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES)
-else
-include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\
-		include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\
-		include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h\
-		include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\
-		include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\
-		include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\
-		include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\
-		include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\
-		include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\
-		include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h
-
-lib_LTLIBRARIES=libSZ.la
-libSZ_la_CFLAGS=-I./include -I../zlib -I../zstd/ 
-if WRITESTATS
-libSZ_la_CFLAGS+=-DHAVE_WRITESTATS
-endif
-if TIMECMPR
-libSZ_la_CFLAGS+=-DHAVE_TIMECMPR
-endif
-if RANDOMACCESS
-libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS
-endif
-if OPENMP
-libSZ_la_CFLAGS+=-fopenmp
-endif
-libSZ_la_LDFLAGS = -version-info  1:4:0
-libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zlib/.libs/libzstd.a
-libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \
-		src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c\
-		src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\
-		src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \
-		src/conf.c src/DynamicDoubleArray.c src/TypeManager.c \
-		src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \
-		src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\
-		src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\
-		src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\
-		src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/exafelSZ.c src/CacheTable.c
-if PASTRI
-libSZ_la_SOURCES+=src/pastri.c
-endif
-if OPENMP
-libSZ_la_SOURCES+=src/sz_omp.c
-endif
-if TIMECMPR
-libSZ_la_SOURCES+=src/sz_float_ts.c src/szd_float_ts.c src/sz_double_ts.c src/szd_double_ts.c
-endif
-if WRITESTATS
-libSZ_la_SOURCES+=src/sz_stats.c
-endif
-
-libSZ_la_LINK= $(AM_V_CC)$(LIBTOOL) --tag=CC --mode=link $(CCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES)
-endif
--- a/deps/SZ/sz/Makefile.in
+++ b/deps/SZ/sz/Makefile.in
--- a/deps/SZ/sz/include/ArithmeticCoding.h
+++ b/deps/SZ/sz/include/ArithmeticCoding.h
@ -1,62 +0,0 @@
-/**
- *  @file ArithmeticCoding.h
- *  @author Sheng Di
- *  @date Dec, 2018
- *  @brief Header file for the ArithmeticCoding.c.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _ArithmeticCoding_H
-#define _ArithmeticCoding_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-
-#define ONE_FOURTH (0x40000000000) //44 bits are absolutely enough to deal with a large dataset (support at most 16TB per process)
-#define ONE_HALF (0x80000000000)
-#define THREE_FOURTHS (0xC0000000000)
-#define MAX_CODE (0xFFFFFFFFFFF)
-#define MAX_INTERVALS 1048576 //the limit to the arithmetic coding (at most 2^(20) intervals)
-
-typedef struct Prob {
-    size_t low;
-    size_t high;
-    int state;
-} Prob;
-
-typedef struct AriCoder
-{
-	int numOfRealStates; //the # real states menas the number of states after the optimization of # intervals
-	int numOfValidStates; //the # valid states means the number of non-zero frequency cells (some states/codes actually didn't appear)
-	size_t total_frequency;	
-	Prob* cumulative_frequency; //used to encode data more efficiencly
-} AriCoder;
-
-void output_bit_1(unsigned int* buf);
-void output_bit_0(unsigned int* buf);
-unsigned int output_bit_1_plus_pending(int pending_bits);
-unsigned int output_bit_0_plus_pending(int pending_bits);
-
-AriCoder *createAriCoder(int numOfStates, int *s, size_t length);
-void freeAriCoder(AriCoder *ariCoder);
-void ari_init(AriCoder *ariCoder, int *s, size_t length);
-unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out);
-int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes);
-
-unsigned char get_bit(unsigned char* p, int offset);
-
-void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize);
-void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out);
-
-Prob* getCode(AriCoder *ariCoder, size_t scaled_value);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _ArithmeticCoding_H  ----- */
-
--- a/deps/SZ/sz/include/CacheTable.h
+++ b/deps/SZ/sz/include/CacheTable.h
@ -1,40 +0,0 @@
-/**
- *  @file CacheTable.h
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Header file.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef SZ_MASTER_CACHETABLE_H
-#define SZ_MASTER_CACHETABLE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "stdio.h"
-#include "stdint.h"
-#include <math.h>
-
-extern double* g_CacheTable;
-extern uint32_t * g_InverseTable;
-extern uint32_t baseIndex;
-extern uint32_t topIndex;
-extern int bits;
-
-int doubleGetExpo(double d);
-int CacheTableGetRequiredBits(double precision, int quantization_intervals);
-uint32_t CacheTableGetIndex(float value, int bits);
-uint64_t CacheTableGetIndexDouble(double value, int bits);
-int CacheTableIsInBoundary(uint32_t index);
-void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals);
-uint32_t CacheTableFind(uint32_t index);
-void CacheTableFree();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //SZ_MASTER_CACHETABLE_H
--- a/deps/SZ/sz/include/DynamicDoubleArray.h
+++ b/deps/SZ/sz/include/DynamicDoubleArray.h
@ -1,36 +0,0 @@
-/**
- *  @file DynamicDoubleArray.h
- *  @author Sheng Di
- *  @date April, 2016
- *  @brief Header file for Dynamic Double Array.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _DynamicDoubleArray_H
-#define _DynamicDoubleArray_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-
-typedef struct DynamicDoubleArray
-{	
-	double* array;
-	size_t size;
-	double capacity;
-} DynamicDoubleArray;
-
-void new_DDA(DynamicDoubleArray **dda, size_t cap);
-void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data);
-void free_DDA(DynamicDoubleArray *dda);
-double getDDA_Data(DynamicDoubleArray *dda, size_t pos);
-void addDDA_Data(DynamicDoubleArray *dda, double value);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _DynamicDoubleArray_H  ----- */
--- a/deps/SZ/sz/include/DynamicFloatArray.h
+++ b/deps/SZ/sz/include/DynamicFloatArray.h
@ -1,35 +0,0 @@
-/**
- *  @file DynamicFloatArray.h
- *  @author Sheng Di
- *  @date April, 2016
- *  @brief Header file for Dynamic Float Array.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _DynamicFloatArray_H
-#define _DynamicFloatArray_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-typedef struct DynamicFloatArray
-{	
-	float* array;
-	size_t size;
-	size_t capacity;
-} DynamicFloatArray;
-
-void new_DFA(DynamicFloatArray **dfa, size_t cap);
-void convertDFAtoFloats(DynamicFloatArray *dfa, float **data);
-void free_DFA(DynamicFloatArray *dfa);
-float getDFA_Data(DynamicFloatArray *dfa, size_t pos);
-void addDFA_Data(DynamicFloatArray *dfa, float value);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _DynamicFloatArray_H  ----- */
--- a/deps/SZ/sz/include/MultiLevelCacheTable.h
+++ b/deps/SZ/sz/include/MultiLevelCacheTable.h
@ -1,50 +0,0 @@
-/**
- *  @file MultiLevelCacheTable.h
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Header file.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _MULTILEVELCACHETABLE_H
-#define _MULTILEVELCACHETABLE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <memory.h>
-#include <stdlib.h>
-#include "stdio.h"
-
-typedef struct SubLevelTable{
-    uint32_t baseIndex;
-    uint32_t topIndex;
-    uint32_t* table;
-    uint8_t expoIndex;
-} SubLevelTable;
-
-typedef struct TopLevelTable{
-    uint8_t bits;
-    uint8_t baseIndex;
-    uint8_t topIndex;
-    struct SubLevelTable* subTables;
-    float bottomBoundary;
-    float topBoundary;
-} TopLevelTable;
-
-uint8_t MLCT_GetExpoIndex(float value);
-uint8_t MLCT_GetRequiredBits(float precision);
-uint32_t MLCT_GetMantiIndex(float value, int bits);
-float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits);
-void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision);
-uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable);
-void MultiLevelCacheTableFree(struct TopLevelTable* table);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //_MULTILEVELCACHETABLE_H
--- a/deps/SZ/sz/include/MultiLevelCacheTableWideInterval.h
+++ b/deps/SZ/sz/include/MultiLevelCacheTableWideInterval.h
@ -1,54 +0,0 @@
-/**
- *  @file MultiLevelCacheTableWideInterval.h
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Header file for MultiLevelCacheTableWideInterval.c.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-
-#ifndef _MULTILEVELCACHETABLEWIDEINTERVAL_H
-#define _MULTILEVELCACHETABLEWIDEINTERVAL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <memory.h>
-#include <stdlib.h>
-#include "stdio.h"
-
-typedef struct SubLevelTableWideInterval{
-    uint64_t baseIndex;
-    uint64_t topIndex;
-    uint16_t* table;
-    uint16_t expoIndex;
-} SubLevelTableWideInterval;
-
-typedef struct TopLevelTableWideInterval{
-    uint16_t bits;
-    uint16_t baseIndex;
-    uint16_t topIndex;
-    struct SubLevelTableWideInterval* subTables;
-    double bottomBoundary;
-    double topBoundary;
-} TopLevelTableWideInterval;
-
-void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable);
-
-uint16_t MLCTWI_GetExpoIndex(double value);
-uint16_t MLCTWI_GetRequiredBits(double precision);
-uint64_t MLCTWI_GetMantiIndex(double value, int bits);
-
-double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits);
-void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits);
-uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable);
-void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //_MULTILEVELCACHETABLEWIDEINTERVAL_H
--- a/deps/SZ/sz/include/TypeManager.h
+++ b/deps/SZ/sz/include/TypeManager.h
@ -18,19 +18,12 @@ extern "C" {
 #include <stdint.h>

 //TypeManager.c
-size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result);
-size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result);
-void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
-size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
-size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result);
 void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
-size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
-void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
+size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
+
+
 int getLeftMovingSteps(size_t k, unsigned char resiBitLength);
 size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes);
-size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes);
-int computeBitNumRequired(size_t dataLength);
-void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength);

 #ifdef __cplusplus
 }
--- a/deps/SZ/sz/include/VarSet.h
+++ b/deps/SZ/sz/include/VarSet.h
@ -1,84 +0,0 @@
-/**
- *  @file VarSet.h
- *  @author Sheng Di
- *  @date July, 2016
- *  @brief Header file for the Variable.c.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _VarSet_H
-#define _VarSet_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-
-typedef struct sz_multisteps
-{
-	char compressionType;
-	int predictionMode;
-	int lastSnapshotStep; //the previous snapshot step
-	unsigned int currentStep; //current time step of the execution/simulation
-	
-	//void* ori_data; //original data pointer, which serve as the key for retrieving hist_data
-	void* hist_data; //historical data in past time steps
-} sz_multisteps;
-
-typedef struct SZ_Variable
-{
-	unsigned char var_id;
-	char* varName;
-	char compressType; //102 means HZ; 101 means SZ 
-	int dataType; //SZ_FLOAT or SZ_DOUBLE
-	size_t r5;
-	size_t r4;
-	size_t r3;
-	size_t r2;
-	size_t r1;
-	int errBoundMode;
-	double absErrBound;
-	double relBoundRatio;
-	double pwRelBoundRatio;
-	void* data;
-	sz_multisteps *multisteps;
-	unsigned char* compressedBytes;
-	size_t compressedSize;
-	struct SZ_Variable* next;
-} SZ_Variable;
-
-typedef struct SZ_VarSet
-{
-	unsigned short count;
-	struct SZ_Variable *header;
-	struct SZ_Variable *lastVar;
-} SZ_VarSet;
-
-void free_Variable_keepOriginalData(SZ_Variable* v);
-void free_Variable_keepCompressedBytes(SZ_Variable* v);
-void free_Variable_all(SZ_Variable* v);
-void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data, 
-			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,
-			size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
-int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName);
-int SZ_batchDelVar(char* varName);
-int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id);
-int SZ_batchDelVar_ID(int var_id);
-
-SZ_Variable* SZ_searchVar(char* varName);
-void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1);
-
-void free_VarSet_vset(SZ_VarSet *vset, int mode);
-void SZ_freeVarSet(int mode);
-
-void free_multisteps(sz_multisteps* multisteps);
-int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count);
-SZ_Variable* SZ_getVariable(int var_id);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _VarSet_H  ----- */
--- a/deps/SZ/sz/include/conf.h
+++ b/deps/SZ/sz/include/conf.h
@ -20,11 +20,8 @@ extern "C" {
 void updateQuantizationInfo(int quant_intervals);
 int SZ_ReadConf(const char* sz_cfgFile);
 int SZ_LoadConf(const char* sz_cfgFile);
-int checkVersion(unsigned char version);
-int computeVersion(int major, int minor, int revision);
-int checkVersion2(char* version);

-void initSZ_TSC();
+
 unsigned int roundUpToPowerOf2(unsigned int base);
 double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range);
 double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle);
--- a/deps/SZ/sz/include/pastri.h
+++ b/deps/SZ/sz/include/pastri.h
@ -1,140 +0,0 @@
-//CHECK:
-//What happens when ECQBits==1, or ECQBits==0 or ECQBits<0?
-//Rounding? Scale originalEb by 0.99?
-
-//Possible improvement: Change GAMESS format: {i i i i d} -> {i}{i}{i}{i}{d}
-//Possible improvement: Optimize bookkeeping bits
-//Possible improvement: Guess the type (C/UC, Sparse/Not)
-//Possible improvement: Get rid of writing/reading some of the indexes to in/out buffers
-//Possible improvement: Get rid of all debug stuff, including Makefile debug flags
-//Possible improvement: Get rid of "compressedBytes"
-//Possible improvement: SparseCompressed, ECQBits=2: 1's and -1's can be represented by just 0 and 1, instead 10 and 11. 
-//Possible improvement: SparseCompressed, ECQBits>2: Again: 1: 10, -1:11, Others: 0XX...XX 
-//Possible improvement: WriteBitsFast: maybe remove some masks?
-//Possible improvement: WriteBitsFast: Get rid of multiple calls!
-//Possible improvement: UCSparse: Indexes use 64 bits. It can be lowered to _1DIdxBits
-//Possible improvement: Parameters: Smaller data sizes may be possible!
-
-
-
-#ifndef PASTRI_H
-#define PASTRI_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-#include <assert.h> //Just for debugging purposes!
-
-//#define DATASIZE 8 //Bytes per input data point.
-//We have only 1 double per data point, so it is 8 bytes.
-
-#define MAX_PS_SIZE 100
-#define MAX_BLOCK_SIZE 10000
-#define MAX_BUFSIZE 160000  //Should be a multiple of 8
-#define D_W 0 //Debug switch: Write (input block)
-#define D_R 0 //Debug switch: Read (compressed block)
-#define D_G 0 //Debug switch: General
-#define D_G2 0 //Debug switch: General 2 (a little more detail)
-#define D_C 0 //Debug switch: C
-//#define DEBUG 1 //Debug switch
-
-//#define BOOKKEEPINGBITS 0 //Currently unused
-//#define BOOKKEEPINGBITS 120 //Includes: mode, indexOffsets, compressedBytes, Pb_, ECQBits_ (8+64+32+8+8) 
-//BOOKKEEPINGBITS is defined here, because if P & S is going to be used, they appear just after the bookkeeping part.
-//This allows us to write P and S directly onto using outBuf.
-  
-
-// IMPORTANT NOTE:
-//Read/Write up to 56 bits.
-//More than that is not supported!
-
-
-/********************************************************************/
-//Datatype Declarations:
-/********************************************************************/
-typedef struct pastri_params{
-  double originalEb; //Error Bound entered by the user
-  double usedEb; //Error Bound used during compression/deceompression
-  
-  int numBlocks; //Number of blocks to be compressed
-  int dataSize; //8(=Double) or 4(=Float)
-  
-  int bf[4]; //Orbital types (basis function types). Typically in range [0,3]
-  int idxRange[4];  //Ranges of indexes. idxRange[i]=(bf[i]+1)*(bf[i]+2)/2;
-  
-  int sbSize; //=idxRange[2]*idxRange[3];
-  int sbNum;  //=idxRange[0]*idxRange[1];
-  int bSize; //=sbSize*sbNum;
-  
-  //uint16_t idxOffset[4]; //Index offset values. No longer used.
-  
-}pastri_params;
-
-//Block-specific stuff:
-typedef struct pastri_blockParams{
-  uint16_t nonZeros;
-  //int ECQ0s; //= p->bSize - numOutliers //OR: p->bSize=ECQ0s+ECQ1s+ECQOthers
-  int ECQ1s;
-  int ECQOthers;
-  int numOutliers; //=ECQ1s+ECQOthers
-  int patternBits;
-  int scaleBits;
-  double binSize;
-  double scalesBinSize;
-  uint64_t ECQExt;
-  int ECQBits;
-  int _1DIdxBits;
-}pastri_blockParams;
-
-typedef union u_UI64I64D{
-  uint64_t ui64;
-  int64_t i64;
-  double d;
-} u_UI64I64D;
-
-/********************************************************************/
-//Function Prototypes:
-/********************************************************************/
-void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr);
-//Read the basic PaSTRI parameters from a file, speficied by paramsFilename.
-
-void SZ_pastriPreprocessParameters(pastri_params *p);
-//Using basic PaSTRI parameters, generate the others.
-//For example, block and sub-block sizes are generated by using basis function types.
-
-void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes);
-//INPUTS: p, originalBuf
-//OUTPUTS: compressedBufP, compressedBytes
-//Using the inputs, compressedBufP is allocated and populated by the compressed data. Compressed size is written into compressedBytes.
-//Parameters are also stored at the beginning part of the compressedBuf
-
-void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes);
-//INPUTS: compressedBuf
-//OUTPUTS: p, decompressedBufP, decompressedBytes
-//First, parameters are read from compressedBuf and written into p.
-//Then, decompressedBufP is allocated and populated by the decompressed data. Decompressed size is written into decompressedBytes.
-
-void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf); 
-//INPUTS: p, originalBuf, decompressedBuf
-//OUTPUTS: None (Just some on-screen messages)
-//Compares originalBuf with decompressedBuf. Checks whether the absolute error condition is satisfied or not.
-
-/********************************************************************/
-//Other Includes:
-/********************************************************************/
-
-
-
-#include "pastriGeneral.h"  //General tools
-#include "pastriD.h"  //Compression/Decompression for Double data
-#include "pastriF.h"  //Compression/Decompression for Float data
-
-
-#endif
-
-
-
-
-
--- a/deps/SZ/sz/include/pastriD.h
+++ b/deps/SZ/sz/include/pastriD.h
@ -1,911 +0,0 @@
-#ifndef PASTRID_H
-#define PASTRID_H
-
-static inline int64_t pastri_double_quantize(double x, double binSize){
-  //Add or sub 0.5, depending on the sign:
-  x=x/binSize;
-  
-  u_UI64I64D u1,half;
-  u1.d=x;
-  
-  half.d=0.5;
-  
-//  //printf("pastri_double_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
-//  //printf("sign(x):0x%lx\n", x);
-//  //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
-  half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
-//  //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
-  return (int64_t)(x + half.d);
-}
-
-static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){
-  //Find the pattern.
-  //First, find the extremum point:
-  double absExt=0; //Absolute value of Extremum
-  int extIdx=-1; //Index of Extremum
-  bp->nonZeros=0;
-  int i,sb;
-  for(i=0;i<p->bSize;i++){
-//    //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
-    if(abs_FastD(data[i])>p->usedEb){
-      bp->nonZeros++;
-      ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
-    }
-    if(abs_FastD(data[i])>absExt){
-      absExt=abs_FastD(data[i]);
-      extIdx=i;
-    }
-  }
-  int patternIdx; //Starting Index of Pattern
-  patternIdx=(extIdx/p->sbSize)*p->sbSize;
-  
-  double patternExt=data[extIdx];
-  bp->binSize=2*p->usedEb;
-  
-  ////if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
-  ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
-  
-  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
-  
-  //int64_t *patternQ=(int64_t*)(outBuf+15);  //Possible Improvement!
-
-  
-  for(i=0;i<p->sbSize;i++){
-    patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize);
-    //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
-  }
-  
-  bp->patternBits=bitsNeeded_double((abs_FastD(patternExt)/bp->binSize)+1)+1;
-  bp->scaleBits=bp->patternBits;
-  bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1);
-  ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
-  ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
-  //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
-  
-  //Calculate Scales.
-  //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values.
-  int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum.
-  //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8);  //Possible Improvement!
-  int patternExtZero=(patternExt==0);
-  ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
-  for(sb=0;sb<p->sbNum;sb++){
-    //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
-    //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt;
-    //assert(scales[sb]<=1);
-    scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
-    //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
-  }
-  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
-
-  //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
-
-  //uint64_t wVal;
-  bp->ECQExt=0;
-  int _1DIdx;
-  bp->ECQ1s=0;
-  bp->ECQOthers=0;
-  double PS_binSize=bp->scalesBinSize*bp->binSize;
-  for(sb=0;sb<p->sbNum;sb++){
-    for(i=0;i<p->sbSize;i++){
-      _1DIdx=sb*p->sbSize+i;
-      ECQ[_1DIdx]=pastri_double_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize );
-      double absECQ=abs_FastD(ECQ[_1DIdx]);
-      if(absECQ > bp->ECQExt)
-        bp->ECQExt=absECQ;
-      ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
-      switch (ECQ[_1DIdx]){
-        case 0:
-          //ECQ0s++; //Currently not needed
-          break;
-        case 1:
-          bp->ECQ1s++;
-          break;
-        case -1:
-          bp->ECQ1s++;
-          break;
-        default:
-          bp->ECQOthers++;
-          break;
-      }
-    }
-  }
-  
-  /*
-  //DEBUG: Self-check. Remove this later.
-  for(sb=0;sb<p->sbNum;sb++){
-    for(i=0;i<p->sbSize;i++){
-      _1DIdx=sb*p->sbSize+i;
-      double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
-      if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
-        //printf("p->usedEb=%.6e\n",p->usedEb);
-        //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
-        assert(0);
-      }
-    }
-  }
-  */
-}
-
-static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){
-  bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1;
-  bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
-  //(*numOutBytes)=0;
-  
-  int i;
-  
-  //Encode: 3 options:
-  //Compressed, Sparse ECQ
-  //Compressed, Non-Sparse ECQ
-  //Uncompressed, Sparse Data
-  //Uncompressed, Non-spsarse Data
-  
-  unsigned int UCSparseBits;  //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data}
-  unsigned int UCNonSparseBits;  //Uncompressed, NonSparse bits. Includes: mode, data
-  unsigned int CSparseBits;  //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
-  unsigned int CNonSparseBits;  //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
-  //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier!
-    
-  //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2}
-  //ECQ0s is actually not needed, but others are needed.
-
-  UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16);  //64 bits for 4 indexes, 64 bit for data.
-  UCNonSparseBits = p->dataSize*(1 + p->bSize*8);
-  bp->numOutliers=bp->ECQ1s+bp->ECQOthers;
-  if(bp->ECQBits==2){
-    CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits);
-    CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ;  //Or: ECQ0s+ECQ1s*2;
-  }else{ //ECQBits>2
-    CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits);
-    //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits);
-    CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits);
-  }
-  
-  int UCSparseBytes=(UCSparseBits+7)/8; 
-  int UCNonSparseBytes=(UCNonSparseBits+7)/8; 
-  int CSparseBytes=(CSparseBits+7)/8; 
-  int CNonSparseBytes=(CNonSparseBits+7)/8; 
-  uint64_t bitPos=0;
-  uint64_t bytePos=0;
-  int i0,i1,i2,i3;
-  int _1DIdx;
-  
-  //*(uint16_t*)(&outBuf[1])=p->idxOffset[0];
-  //*(uint16_t*)(&outBuf[3])=p->idxOffset[1];
-  //*(uint16_t*)(&outBuf[5])=p->idxOffset[2];
-  //*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
-    
-  //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
-  //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
-  
-  //****************************************************************************************
-  //if(0){ //DEBUG
-  //W:UCSparse
-  if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ 
-    //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
-    *numOutBytes=UCSparseBytes;
-    //if(D_G){printf("UCSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    outBuf[0]=0; //mode
-    
-    //*(uint16_t*)(&outBuf[9])=nonZeros;
-    //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11.
-    *(uint16_t*)(&outBuf[1])=bp->nonZeros;
-    bytePos=3;//0:mode, 2-3:NonZeros. So start from 3.
-    
-    for(i0=0;i0<p->idxRange[0];i0++)
-      for(i1=0;i1<p->idxRange[1];i1++)
-        for(i2=0;i2<p->idxRange[2];i2++)
-          for(i3=0;i3<p->idxRange[3];i3++){
-            _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-            if(abs_FastD(data[_1DIdx])>p->usedEb){
-              //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0];
-              *(uint16_t*)(&outBuf[bytePos])=i0;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1];
-              *(uint16_t*)(&outBuf[bytePos])=i1;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2];
-              *(uint16_t*)(&outBuf[bytePos])=i2;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3];
-              *(uint16_t*)(&outBuf[bytePos])=i3;
-              bytePos+=2;
-              
-              *(double*)(&outBuf[bytePos])=data[_1DIdx];
-              bytePos+=p->dataSize;
-            }
-          }
-    
-    //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
-    
-  //****************************************************************************************
-  //}else if(0){ //DEBUG
-  //W:UCNonSparse
-  }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ 
-    //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
-    *numOutBytes=UCNonSparseBytes;
-    //if(D_G){printf("UCNonSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    outBuf[0]=1; //mode
-    
-    //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9);
-    memcpy(&outBuf[1], data, p->bSize*p->dataSize);
-    
-    //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
-    /*
-    for(i=0;i<UCNonSparseBytes-17;i++){
-      //printf("%d ",inBuf[p->bSize*8+i]);
-    }
-    //printf("\n");
-    for(i=0;i<UCNonSparseBytes-17;i++){
-      //printf("%d ",outBuf[17+i]);
-    }
-    //printf("\n");
-    */
-  //****************************************************************************************
-  //}else if(1){ //DEBUG
-  //W:CSparse
-  }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ 
-    //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
-    *numOutBytes=CSparseBytes;
-    //if(D_G){printf("CSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
-    outBuf[0]=2; //mode
-    
-    ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
-    //outBuf[13]=patternBits;
-    //outBuf[14]=ECQBits;
-    ////Currently, we are at the end of 15th byte.
-    //*(uint16_t*)(&outBuf[15])=numOutliers;
-    //bitPos=17*8; //Currently, we are at the end of 17th byte.
-    
-    //outBuf bytes [1:4] are reserved for compressedBytes.
-    outBuf[5]=bp->patternBits;
-    outBuf[6]=bp->ECQBits;
-    //Currently, we are at the end of 7th byte.
-    
-    *(uint16_t*)(&outBuf[7])=bp->numOutliers; 
-    //Now, we are at the end of 9th byte.
-    bitPos=9*8; 
-    
-    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
-
-    for(i=0;i<p->sbSize;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
-    }
-    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
-    for(i=0;i<p->sbNum;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
-    }
-    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
-    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
-    switch(bp->ECQBits){
-      case 2:
-        for(i=0;i<p->bSize;i++){
-          switch(ECQ[i]){
-            case 0:
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-              //writeBits_Fast(outBuf,&bitPos,2,0x10);
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              writeBits_Fast(outBuf,&bitPos,1,0);//0x00
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-              //writeBits_Fast(outBuf,&bitPos,2,0x11);
-              //writeBits_Fast(outBuf,&bitPos,2,1);//0x01
-              //writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,1);
-              break;
-            default:
-              assert(0);
-              break;
-          }
-        }
-        break;
-      default: //ECQBits>2
-      for(i=0;i<p->bSize;i++){
-        switch(ECQ[i]){
-          case 0:
-            break;
-          case 1:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,3,0);//0x000
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            break;
-          case -1:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,3,1);//0x001
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,1);
-            break;
-          default:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
-            //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits));
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,1);
-            writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
-            break;
-        }
-      }
-      break;
-    }
-    
-    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
-    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-          
-
-    uint32_t bytePos=(bitPos+7)/8;
-    //*(uint32_t*)(&outBuf[9])=bytePos;
-    *(uint32_t*)(&outBuf[1])=bytePos;
-    
-    //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
-    if(D_G){assert(bitPos==CSparseBits);}
-    
-  //****************************************************************************************
-  //W:CNonSparse
-  }else { 
-    //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
-    *numOutBytes=CNonSparseBytes;
-    //if(D_G){printf("CNonSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
-    outBuf[0]=3; //mode
-    
-    ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
-    //outBuf[13]=patternBits;
-    //outBuf[14]=ECQBits;
-    //bitPos=15*8; //Currently, we are at the end of 15th byte.
-    
-    //outBuf bytes [1:4] are reserved for compressedBytes.
-    outBuf[5]=bp->patternBits;
-    outBuf[6]=bp->ECQBits;
-    bitPos=7*8; //Currently, we are at the end of 7th byte.
-    
-    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
-
-    for(i=0;i<p->sbSize;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
-    }
-    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
-    for(i=0;i<p->sbNum;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
-    }
-    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
-    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
-    switch(bp->ECQBits){
-      case 2:
-        for(i=0;i<p->bSize;i++){
-          switch(ECQ[i]){
-            case 0:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,1,1);//0x1
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
-              //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,1);
-              break;
-            default:
-              assert(0);
-              break;
-          }
-        }
-        break;
-      default: //ECQBits>2
-        ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
-        for(i=0;i<p->bSize;i++){
-          ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-          ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
-          ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
-          switch(ECQ[i]){
-            case 0:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              writeBits_Fast(outBuf,&bitPos,1,1);  //0x1
-              //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,3,0); //0x000
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,3,8); //0x001
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,1); 
-              //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            default:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,1); 
-              //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
-              writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-          }
-        }
-        break;
-    }
-    
-    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
-    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-    
-          
-
-    uint32_t bytePos=(bitPos+7)/8;
-    //*(uint32_t*)(&outBuf[9])=bytePos;
-    *(uint32_t*)(&outBuf[1])=bytePos;
-    
-    //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
-    if(D_G){assert(bitPos==CNonSparseBits);}
-    
-  }
-  ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
-  
-}
-static inline int pastri_double_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){
-  pastri_blockParams bp;
-
-  //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
-  //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
-  //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
-  //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
-  
-  int64_t patternQ[MAX_PS_SIZE];
-  int64_t scalesQ[MAX_PS_SIZE];
-  int64_t ECQ[MAX_BLOCK_SIZE];
-
-  double *data;
-  data=(double*)inBuf;
-  
-  //STEP 0: PREPROCESSING:
-  //This step can include flattening the block, determining the period, etc.
-  //Currently not needed.
-  
-  //STEP 1: PATTERN MATCH
-  pastri_double_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ);
-  
-  //STEP 2: ENCODING(Include QUANTIZE)
-  pastri_double_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes);
-  
-
-  return 0;
-}
-
-static inline double pastri_double_InverseQuantization(int64_t q, double binSize){
-  return q*binSize;
-}
-
-static inline void pastri_double_PredictData(pastri_params *p,pastri_blockParams *bp,double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
-  int j;
-  double PS_binSize=bp->scalesBinSize*bp->binSize;
-  for(j=0;j<p->bSize;j++){
-    //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize;
-    data[j]=pastri_double_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_double_InverseQuantization(ECQ[j],bp->binSize);
-  }
-}
-
-static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
-  int j;
-  bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
-  //double *data=(double*)(outBuf+p->bSize*8);
-  double *data=(double*)(outBuf);
-  int i0,i1,i2,i3;
-  //uint16_t *idx0,*idx1,*idx2,*idx3;
-  int _1DIdx;
-
-  int64_t ECQTemp;
-  uint64_t bytePos=0;
-  uint64_t bitPos=0;
-  uint64_t temp,temp2;
-  //int sb,localIdx;
-
-  
-  //idx0=(uint16_t*)(outBuf           );
-  //idx1=(uint16_t*)(outBuf+p->bSize*2);
-  //idx2=(uint16_t*)(outBuf+p->bSize*4);
-  //idx3=(uint16_t*)(outBuf+p->bSize*6);
-  //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]);
-  //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]);
-  //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]);
-  //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]);
-  /*
-  for(i0=0;i0<p->idxRange[0];i0++)
-    for(i1=0;i1<p->idxRange[1];i1++)
-      for(i2=0;i2<p->idxRange[2];i2++)
-        for(i3=0;i3<p->idxRange[3];i3++){
-            //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3;
-            _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-            idx0[_1DIdx]=i0+1+p->idxOffset[0];
-            idx1[_1DIdx]=i1+1+p->idxOffset[1];
-            idx2[_1DIdx]=i2+1+p->idxOffset[2];
-            idx3[_1DIdx]=i3+1+p->idxOffset[3];
-        }
-  */
-  
-  //*numOutBytes=p->bSize*16;  
-  
-  //inBuf[0] is "mode"
-  switch(inBuf[0]){
-    //R:UCSparse
-    case 0:
-      //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
-      //bp->nonZeros=*(uint16_t*)(&inBuf[9]);
-      //bytePos=11;
-      bp->nonZeros=*(uint16_t*)(&inBuf[1]);
-      bytePos=3;
-      for(j=0;j<p->bSize;j++){
-          data[j]=0;
-      }
-      for(j=0;j<bp->nonZeros;j++){
-        //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0
-        i0=*(uint16_t*)(&inBuf[bytePos]); //i0
-        bytePos+=2;
-        //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1
-        i1=*(uint16_t*)(&inBuf[bytePos]); //i1
-        bytePos+=2;
-        //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2
-        i2=*(uint16_t*)(&inBuf[bytePos]); //i2
-        bytePos+=2;
-        //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3
-        i3=*(uint16_t*)(&inBuf[bytePos]); //i3
-        bytePos+=2;
-        _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-        data[_1DIdx]=*(double*)(&inBuf[bytePos]);
-        bytePos+=8; 
-      }
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      break;
-    //R:UCNonSparse
-    case 1:
-      //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
-      //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
-      memcpy(data, &inBuf[1], p->bSize*8);
-      bytePos=p->bSize*8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      break;
-    //R:CSparse
-    case 2:
-      //if(D_G){printf("\nDC:CSparse\n");} //DEBUG
-      //for(j=0;j<p->bSize;j++){
-      //  data[j]=0;
-      //}
-      
-      //bp->patternBits=inBuf[13];
-      //bp->ECQBits=inBuf[14];      
-      
-      bp->patternBits=inBuf[5];
-      bp->ECQBits=inBuf[6];
-      
-      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
-      
-      //bp->numOutliers=*(uint16_t*)(&inBuf[15]);
-      //bitPos=17*8;
-      bp->numOutliers=*(uint16_t*)(&inBuf[7]);
-      bitPos=9*8;
-      //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
-
-      bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1);
-  
-      bp->binSize=p->usedEb*2;
-      
-      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
-
-      for(j=0;j<p->sbSize;j++){
-        patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
-        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
-      }
-      for(j=0;j<p->sbNum;j++){
-        scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
-        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
-      }
-      
-      /* //Splitting
-      for(j=0;j<p->bSize;j++){
-        data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
-      }
-      */
-      for(j=0;j<p->bSize;j++){
-        ECQ[j]=0;
-      }
-      switch(bp->ECQBits){
-        case 2:
-          for(j=0;j<bp->numOutliers;j++){
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            
-            _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            ECQTemp=readBits_I64(inBuf,&bitPos,1);
-            ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-            ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
-            ECQ[_1DIdx]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-        default: //bp->ECQBits>2
-          //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-    
-          for(j=0;j<bp->numOutliers;j++){
-            _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
-            switch(temp){
-              case 0:  //+-1
-                ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-                break;
-              case 1: //Others
-                ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
-                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-                break;
-              //default:
-              ////  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
-              // assert(0); //AMG
-              //  break;
-            }
-            
-            //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
-            ECQ[_1DIdx]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-      }
-      //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
-      //patternQ=(int64_t*)(inBuf+15); 
-      //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
-      
-      bytePos=(bitPos+7)/8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      
-      //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
-      pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
-
-      break;
-    //R:CNonSparse
-    case 3:
-      //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
-      
-      //for(j=0;j<p->bSize;j++){
-      //  data[j]=0;
-      //}
-      
-      //bp->patternBits=inBuf[13];
-      //bp->ECQBits=inBuf[14];
-      
-      bp->patternBits=inBuf[5];
-      bp->ECQBits=inBuf[6];
-      
-      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
-      
-      //bitPos=15*8;
-      bitPos=7*8;
-
-      bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1);
-      bp->binSize=p->usedEb*2;
-      
-      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
-
-      for(j=0;j<p->sbSize;j++){
-        patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
-        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
-      }
-      for(j=0;j<p->sbNum;j++){
-        scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
-        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
-      }
-      /* //Splitting
-      for(j=0;j<p->bSize;j++){
-        data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
-        ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
-      }
-      */
-      switch(bp->ECQBits){
-        case 2:
-          for(j=0;j<p->bSize;j++){
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            switch(temp){
-              case 0:
-                ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                break;
-              case 1:
-                ECQTemp=0;
-                break;
-              default:
-                assert(0);
-                break;
-            }
-            
-            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            //data[j]-=ECQTemp*bp->binSize; //Splitting
-            ECQ[j]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-        default: //bp->ECQBits>2
-          ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
-          
-          for(j=0;j<p->bSize;j++){
-            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-            ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
-
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-            switch(temp){
-              case 0:
-                ////if(DEBUG)printf("Read:0");
-                temp2=readBits_UI64(inBuf,&bitPos,1);
-                switch(temp2){
-                  case 0:
-                    ////if(DEBUG)printf("0");
-                    ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-                    ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
-                    ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                    break;
-                  case 1:
-                    ////if(DEBUG)printf("1\n");
-                    ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
-                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                    break;
-                  default:
-                    assert(0);
-                    break;
-                }
-                break;
-              case 1:
-                ////if(DEBUG)printf("Read:1\n");
-                ECQTemp=0;
-                ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                break;
-              default:
-                assert(0);
-                break;
-            }
-            
-            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            //data[j]-=ECQTemp*bp->binSize; //Splitting
-            ECQ[j]=ECQTemp;
-            
-            ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
-          }
-          break;
-      }
-      //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
-      //patternQ=(int64_t*)(inBuf+15); 
-      //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
-      bytePos=(bitPos+7)/8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      
-      //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
-      pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
-      break;
-      
-    default:
-      assert(0);
-      break;
-  } 
-  (*numReadBytes)=bytePos;
-}
-
-static inline void pastri_double_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){
-  int64_t patternQ[MAX_PS_SIZE]; 
-  int64_t scalesQ[MAX_PS_SIZE];
-  int64_t ECQ[MAX_BLOCK_SIZE];
-  
-  pastri_blockParams bp;
-  
-  //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION))
-  //(Further steps are called inside pastri_double_Decode function)
-  pastri_double_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ);
-
-  return;
-}
-
-//inBuf vs Decompressed
-static inline int pastri_double_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){
-  int i;
-  
-  double *data=(double*)(inBuf);
-  double *data_dc=(double*)(DC);
-  
-  //Comparing Indexes:
-  /*
-  for(i=0;i<p->bSize;i++){
-    if(idx0[i]!=idx0_dc[i]){
-      //printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
-      assert(0);
-    }
-    if(idx1[i]!=idx1_dc[i]){
-      //printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
-      assert(0);
-    }
-    if(idx2[i]!=idx2_dc[i]){
-      //printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
-      assert(0);
-    }
-    if(idx3[i]!=idx3_dc[i]){
-      //printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
-      assert(0);
-    }
-  }
-  */
-  
-  //Comparing Data:
-  for(i=0;i<p->bSize;i++){
-    if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
-      //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
-      assert(0);
-    }
-  }
-  return 0;
-}
-
-
-#endif
--- a/deps/SZ/sz/include/pastriF.h
+++ b/deps/SZ/sz/include/pastriF.h
@ -1,911 +0,0 @@
-#ifndef PASTRIF_H
-#define PASTRIF_H
-
-static inline int64_t pastri_float_quantize(float x, float binSize){
-  //Add or sub 0.5, depending on the sign:
-  x=x/binSize;
-  
-  u_UI64I64D u1,half;
-  u1.d=x;
-  
-  half.d=0.5;
-  
-  ////printf("pastri_float_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
-  ////printf("sign(x):0x%lx\n", x);
-  ////printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
-  half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
-  ////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
-  return (int64_t)(x + half.d);
-}
-
-static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){
-  //Find the pattern.
-  //First, find the extremum point:
-  float absExt=0; //Absolute value of Extremum
-  int extIdx=-1; //Index of Extremum
-  bp->nonZeros=0;
-  int i,sb;
-  for(i=0;i<p->bSize;i++){
-    ////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
-    if(abs_FastD(data[i])>p->usedEb){
-      bp->nonZeros++;
-      ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
-    }
-    if(abs_FastD(data[i])>absExt){
-      absExt=abs_FastD(data[i]);
-      extIdx=i;
-    }
-  }
-  int patternIdx; //Starting Index of Pattern
-  patternIdx=(extIdx/p->sbSize)*p->sbSize;
-  
-  float patternExt=data[extIdx];
-  bp->binSize=2*p->usedEb;
-  
-  ////if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
-  ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
-  
-  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
-  
-  //int64_t *patternQ=(int64_t*)(outBuf+15);  //Possible Improvement!
-
-  
-  for(i=0;i<p->sbSize;i++){
-    patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize);
-    //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
-  }
-  
-  bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1;
-  bp->scaleBits=bp->patternBits;
-  bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1);
-  ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
-  ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
-  //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
-  
-  //Calculate Scales.
-  //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values.
-  int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum.
-  //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8);  //Possible Improvement!
-  int patternExtZero=(patternExt==0);
-  ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
-  for(sb=0;sb<p->sbNum;sb++){
-    //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
-    //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt;
-    //assert(scales[sb]<=1);
-    scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
-    //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
-  }
-  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
-
-  //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
-
-  //uint64_t wVal;
-  bp->ECQExt=0;
-  int _1DIdx;
-  bp->ECQ1s=0;
-  bp->ECQOthers=0;
-  float PS_binSize=bp->scalesBinSize*bp->binSize;
-  for(sb=0;sb<p->sbNum;sb++){
-    for(i=0;i<p->sbSize;i++){
-      _1DIdx=sb*p->sbSize+i;
-      ECQ[_1DIdx]=pastri_float_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize );
-      float absECQ=abs_FastD(ECQ[_1DIdx]);
-      if(absECQ > bp->ECQExt)
-        bp->ECQExt=absECQ;
-      ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
-      switch (ECQ[_1DIdx]){
-        case 0:
-          //ECQ0s++; //Currently not needed
-          break;
-        case 1:
-          bp->ECQ1s++;
-          break;
-        case -1:
-          bp->ECQ1s++;
-          break;
-        default:
-          bp->ECQOthers++;
-          break;
-      }
-    }
-  }
-  
-  /*
-  //DEBUG: Self-check. Remove this later.
-  for(sb=0;sb<p->sbNum;sb++){
-    for(i=0;i<p->sbSize;i++){
-      _1DIdx=sb*p->sbSize+i;
-      float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
-      if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
-        //printf("p->usedEb=%.6e\n",p->usedEb);
-        //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
-        assert(0);
-      }
-    }
-  }
-  */
-}
-
-static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){
-  bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1;
-  bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
-  //(*numOutBytes)=0;
-  
-  int i;
-  
-  //Encode: 3 options:
-  //Compressed, Sparse ECQ
-  //Compressed, Non-Sparse ECQ
-  //Uncompressed, Sparse Data
-  //Uncompressed, Non-spsarse Data
-  
-  unsigned int UCSparseBits;  //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data}
-  unsigned int UCNonSparseBits;  //Uncompressed, NonSparse bits. Includes: mode, data
-  unsigned int CSparseBits;  //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
-  unsigned int CNonSparseBits;  //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
-  //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier!
-    
-  //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2}
-  //ECQ0s is actually not needed, but others are needed.
-
-  UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16);  //64 bits for 4 indexes, 64 bit for data.
-  UCNonSparseBits = p->dataSize*(1 + p->bSize*8);
-  bp->numOutliers=bp->ECQ1s+bp->ECQOthers;
-  if(bp->ECQBits==2){
-    CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits);
-    CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ;  //Or: ECQ0s+ECQ1s*2;
-  }else{ //ECQBits>2
-    CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits);
-    //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits);
-    CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits);
-  }
-  
-  int UCSparseBytes=(UCSparseBits+7)/8; 
-  int UCNonSparseBytes=(UCNonSparseBits+7)/8; 
-  int CSparseBytes=(CSparseBits+7)/8; 
-  int CNonSparseBytes=(CNonSparseBits+7)/8; 
-  uint64_t bitPos=0;
-  uint64_t bytePos=0;
-  int i0,i1,i2,i3;
-  int _1DIdx;
-  
-  //*(uint16_t*)(&outBuf[1])=p->idxOffset[0];
-  //*(uint16_t*)(&outBuf[3])=p->idxOffset[1];
-  //*(uint16_t*)(&outBuf[5])=p->idxOffset[2];
-  //*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
-    
-  //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
-  //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
-  
-  //****************************************************************************************
-  //if(0){ //DEBUG
-  //W:UCSparse
-  if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ 
-    //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
-    *numOutBytes=UCSparseBytes;
-    //if(D_G){printf("UCSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    outBuf[0]=0; //mode
-    
-    //*(uint16_t*)(&outBuf[9])=nonZeros;
-    //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11.
-    *(uint16_t*)(&outBuf[1])=bp->nonZeros;
-    bytePos=3;//0:mode, 2-3:NonZeros. So start from 3.
-    
-    for(i0=0;i0<p->idxRange[0];i0++)
-      for(i1=0;i1<p->idxRange[1];i1++)
-        for(i2=0;i2<p->idxRange[2];i2++)
-          for(i3=0;i3<p->idxRange[3];i3++){
-            _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-            if(abs_FastD(data[_1DIdx])>p->usedEb){
-              //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0];
-              *(uint16_t*)(&outBuf[bytePos])=i0;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1];
-              *(uint16_t*)(&outBuf[bytePos])=i1;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2];
-              *(uint16_t*)(&outBuf[bytePos])=i2;
-              bytePos+=2;
-              //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3];
-              *(uint16_t*)(&outBuf[bytePos])=i3;
-              bytePos+=2;
-              
-              *(float*)(&outBuf[bytePos])=data[_1DIdx];
-              bytePos+=p->dataSize;
-            }
-          }
-    
-    //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
-    
-  //****************************************************************************************
-  //}else if(0){ //DEBUG
-  //W:UCNonSparse
-  }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ 
-    //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
-    *numOutBytes=UCNonSparseBytes;
-    //if(D_G){printf("UCNonSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    outBuf[0]=1; //mode
-    
-    //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9);
-    memcpy(&outBuf[1], data, p->bSize*p->dataSize);
-    
-    //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
-    /*
-    for(i=0;i<UCNonSparseBytes-17;i++){
-      //printf("%d ",inBuf[p->bSize*8+i]);
-    }
-    //printf("\n");
-    for(i=0;i<UCNonSparseBytes-17;i++){
-      //printf("%d ",outBuf[17+i]);
-    }
-    //printf("\n");
-    */
-  //****************************************************************************************
-  //}else if(1){ //DEBUG
-  //W:CSparse
-  }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ 
-    //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
-    *numOutBytes=CSparseBytes;
-    //if(D_G){printf("CSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
-    outBuf[0]=2; //mode
-    
-    ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
-    //outBuf[13]=patternBits;
-    //outBuf[14]=ECQBits;
-    ////Currently, we are at the end of 15th byte.
-    //*(uint16_t*)(&outBuf[15])=numOutliers;
-    //bitPos=17*8; //Currently, we are at the end of 17th byte.
-    
-    //outBuf bytes [1:4] are reserved for compressedBytes.
-    outBuf[5]=bp->patternBits;
-    outBuf[6]=bp->ECQBits;
-    //Currently, we are at the end of 7th byte.
-    
-    *(uint16_t*)(&outBuf[7])=bp->numOutliers; 
-    //Now, we are at the end of 9th byte.
-    bitPos=9*8; 
-    
-    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
-
-    for(i=0;i<p->sbSize;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
-    }
-    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
-    for(i=0;i<p->sbNum;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
-    }
-    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
-    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
-    switch(bp->ECQBits){
-      case 2:
-        for(i=0;i<p->bSize;i++){
-          switch(ECQ[i]){
-            case 0:
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-              //writeBits_Fast(outBuf,&bitPos,2,0x10);
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              writeBits_Fast(outBuf,&bitPos,1,0);//0x00
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-              //writeBits_Fast(outBuf,&bitPos,2,0x11);
-              //writeBits_Fast(outBuf,&bitPos,2,1);//0x01
-              //writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,1);
-              break;
-            default:
-              assert(0);
-              break;
-          }
-        }
-        break;
-      default: //ECQBits>2
-      for(i=0;i<p->bSize;i++){
-        switch(ECQ[i]){
-          case 0:
-            break;
-          case 1:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,3,0);//0x000
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            break;
-          case -1:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,3,1);//0x001
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,1);
-            break;
-          default:
-            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
-            writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
-            //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
-            //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits));
-            //writeBits_Fast(outBuf,&bitPos,1,0);
-            writeBits_Fast(outBuf,&bitPos,1,1);
-            writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
-            break;
-        }
-      }
-      break;
-    }
-    
-    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
-    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-          
-
-    uint32_t bytePos=(bitPos+7)/8;
-    //*(uint32_t*)(&outBuf[9])=bytePos;
-    *(uint32_t*)(&outBuf[1])=bytePos;
-    
-    //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
-    if(D_G){assert(bitPos==CSparseBits);}
-    
-  //****************************************************************************************
-  //W:CNonSparse
-  }else { 
-    //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
-    *numOutBytes=CNonSparseBytes;
-    //if(D_G){printf("CNonSparse\n");} //DEBUG
-    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
-    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
-    outBuf[0]=3; //mode
-    
-    ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
-    //outBuf[13]=patternBits;
-    //outBuf[14]=ECQBits;
-    //bitPos=15*8; //Currently, we are at the end of 15th byte.
-    
-    //outBuf bytes [1:4] are reserved for compressedBytes.
-    outBuf[5]=bp->patternBits;
-    outBuf[6]=bp->ECQBits;
-    bitPos=7*8; //Currently, we are at the end of 7th byte.
-    
-    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
-
-    for(i=0;i<p->sbSize;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
-    }
-    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
-    for(i=0;i<p->sbNum;i++){
-      writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
-    }
-    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
-    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
-    switch(bp->ECQBits){
-      case 2:
-        for(i=0;i<p->bSize;i++){
-          switch(ECQ[i]){
-            case 0:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
-              writeBits_Fast(outBuf,&bitPos,1,1);//0x1
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
-              //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
-              //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,1);
-              break;
-            default:
-              assert(0);
-              break;
-          }
-        }
-        break;
-      default: //ECQBits>2
-        ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
-        for(i=0;i<p->bSize;i++){
-          ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-          ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
-          ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
-          switch(ECQ[i]){
-            case 0:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              writeBits_Fast(outBuf,&bitPos,1,1);  //0x1
-              //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            case 1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,3,0); //0x000
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              writeBits_Fast(outBuf,&bitPos,1,0);
-              //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            case -1:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,3,8); //0x001
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,1); 
-              //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-            default:
-              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
-              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
-              //temp1=bitPos;
-              //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
-              writeBits_Fast(outBuf,&bitPos,1,0); 
-              writeBits_Fast(outBuf,&bitPos,1,1); 
-              //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
-              writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
-              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
-              break;
-          }
-        }
-        break;
-    }
-    
-    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
-    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-    
-          
-
-    uint32_t bytePos=(bitPos+7)/8;
-    //*(uint32_t*)(&outBuf[9])=bytePos;
-    *(uint32_t*)(&outBuf[1])=bytePos;
-    
-    //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
-    if(D_G){assert(bitPos==CNonSparseBits);}
-    
-  }
-  ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
-  
-}
-static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){
-  pastri_blockParams bp;
-
-  //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
-  //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
-  //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
-  //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
-  
-  int64_t patternQ[MAX_PS_SIZE];
-  int64_t scalesQ[MAX_PS_SIZE];
-  int64_t ECQ[MAX_BLOCK_SIZE];
-
-  float *data;
-  data=(float*)inBuf;
-  
-  //STEP 0: PREPROCESSING:
-  //This step can include flattening the block, determining the period, etc.
-  //Currently not needed.
-  
-  //STEP 1: PATTERN MATCH
-  pastri_float_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ);
-  
-  //STEP 2: ENCODING(Include QUANTIZE)
-  pastri_float_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes);
-  
-
-  return 0;
-}
-
-static inline float pastri_float_InverseQuantization(int64_t q, float binSize){
-  return q*binSize;
-}
-
-static inline void pastri_float_PredictData(pastri_params *p,pastri_blockParams *bp,float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
-  int j;
-  float PS_binSize=bp->scalesBinSize*bp->binSize;
-  for(j=0;j<p->bSize;j++){
-    //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize;
-    data[j]=pastri_float_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_float_InverseQuantization(ECQ[j],bp->binSize);
-  }
-}
-
-static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
-  int j;
-  bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
-  //float *data=(float*)(outBuf+p->bSize*8);
-  float *data=(float*)(outBuf);
-  int i0,i1,i2,i3;
-  //uint16_t *idx0,*idx1,*idx2,*idx3;
-  int _1DIdx;
-
-  int64_t ECQTemp;
-  uint64_t bytePos=0;
-  uint64_t bitPos=0;
-  uint64_t temp,temp2;
-  //int sb,localIdx;
-
-  
-  //idx0=(uint16_t*)(outBuf           );
-  //idx1=(uint16_t*)(outBuf+p->bSize*2);
-  //idx2=(uint16_t*)(outBuf+p->bSize*4);
-  //idx3=(uint16_t*)(outBuf+p->bSize*6);
-  //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]);
-  //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]);
-  //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]);
-  //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]);
-  /*
-  for(i0=0;i0<p->idxRange[0];i0++)
-    for(i1=0;i1<p->idxRange[1];i1++)
-      for(i2=0;i2<p->idxRange[2];i2++)
-        for(i3=0;i3<p->idxRange[3];i3++){
-            //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3;
-            _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-            idx0[_1DIdx]=i0+1+p->idxOffset[0];
-            idx1[_1DIdx]=i1+1+p->idxOffset[1];
-            idx2[_1DIdx]=i2+1+p->idxOffset[2];
-            idx3[_1DIdx]=i3+1+p->idxOffset[3];
-        }
-  */
-  
-  //*numOutBytes=p->bSize*16;  
-  
-  //inBuf[0] is "mode"
-  switch(inBuf[0]){
-    //R:UCSparse
-    case 0:
-      //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
-      //bp->nonZeros=*(uint16_t*)(&inBuf[9]);
-      //bytePos=11;
-      bp->nonZeros=*(uint16_t*)(&inBuf[1]);
-      bytePos=3;
-      for(j=0;j<p->bSize;j++){
-          data[j]=0;
-      }
-      for(j=0;j<bp->nonZeros;j++){
-        //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0
-        i0=*(uint16_t*)(&inBuf[bytePos]); //i0
-        bytePos+=2;
-        //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1
-        i1=*(uint16_t*)(&inBuf[bytePos]); //i1
-        bytePos+=2;
-        //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2
-        i2=*(uint16_t*)(&inBuf[bytePos]); //i2
-        bytePos+=2;
-        //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3
-        i3=*(uint16_t*)(&inBuf[bytePos]); //i3
-        bytePos+=2;
-        _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
-        data[_1DIdx]=*(float*)(&inBuf[bytePos]);
-        bytePos+=8; 
-      }
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      break;
-    //R:UCNonSparse
-    case 1:
-      //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
-      //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
-      memcpy(data, &inBuf[1], p->bSize*8);
-      bytePos=p->bSize*8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      break;
-    //R:CSparse
-    case 2:
-      //if(D_G){printf("\nDC:CSparse\n");} //DEBUG
-      //for(j=0;j<p->bSize;j++){
-      //  data[j]=0;
-      //}
-      
-      //bp->patternBits=inBuf[13];
-      //bp->ECQBits=inBuf[14];      
-      
-      bp->patternBits=inBuf[5];
-      bp->ECQBits=inBuf[6];
-      
-      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
-      
-      //bp->numOutliers=*(uint16_t*)(&inBuf[15]);
-      //bitPos=17*8;
-      bp->numOutliers=*(uint16_t*)(&inBuf[7]);
-      bitPos=9*8;
-      //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
-
-      bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1);
-  
-      bp->binSize=p->usedEb*2;
-      
-      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
-
-      for(j=0;j<p->sbSize;j++){
-        patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
-        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
-      }
-      for(j=0;j<p->sbNum;j++){
-        scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
-        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
-      }
-      
-      /* //Splitting
-      for(j=0;j<p->bSize;j++){
-        data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
-      }
-      */
-      for(j=0;j<p->bSize;j++){
-        ECQ[j]=0;
-      }
-      switch(bp->ECQBits){
-        case 2:
-          for(j=0;j<bp->numOutliers;j++){
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            
-            _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            ECQTemp=readBits_I64(inBuf,&bitPos,1);
-            ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-            ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
-            ECQ[_1DIdx]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-        default: //bp->ECQBits>2
-          //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
-    
-          for(j=0;j<bp->numOutliers;j++){
-            _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
-            switch(temp){
-              case 0:  //+-1
-                ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-                break;
-              case 1: //Others
-                ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
-                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
-                break;
-              //default:
-              ////  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
-              // assert(0); //AMG
-              //  break;
-            }
-            
-            //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
-            ECQ[_1DIdx]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-      }
-      //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
-      //patternQ=(int64_t*)(inBuf+15); 
-      //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
-      
-      bytePos=(bitPos+7)/8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      
-      //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
-      pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
-
-      break;
-    //R:CNonSparse
-    case 3:
-      //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
-      
-      //for(j=0;j<p->bSize;j++){
-      //  data[j]=0;
-      //}
-      
-      //bp->patternBits=inBuf[13];
-      //bp->ECQBits=inBuf[14];
-      
-      bp->patternBits=inBuf[5];
-      bp->ECQBits=inBuf[6];
-      
-      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
-      
-      //bitPos=15*8;
-      bitPos=7*8;
-
-      bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1);
-      bp->binSize=p->usedEb*2;
-      
-      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
-
-      for(j=0;j<p->sbSize;j++){
-        patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
-        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
-      }
-      for(j=0;j<p->sbNum;j++){
-        scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
-        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
-      }
-      /* //Splitting
-      for(j=0;j<p->bSize;j++){
-        data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
-        ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
-      }
-      */
-      switch(bp->ECQBits){
-        case 2:
-          for(j=0;j<p->bSize;j++){
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            switch(temp){
-              case 0:
-                ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                break;
-              case 1:
-                ECQTemp=0;
-                break;
-              default:
-                assert(0);
-                break;
-            }
-            
-            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            //data[j]-=ECQTemp*bp->binSize; //Splitting
-            ECQ[j]=ECQTemp;
-            
-            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
-          }
-          break;
-        default: //bp->ECQBits>2
-          ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
-          
-          for(j=0;j<p->bSize;j++){
-            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-            ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
-
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
-            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
-            //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
-            temp=readBits_UI64(inBuf,&bitPos,1);
-            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-            switch(temp){
-              case 0:
-                ////if(DEBUG)printf("Read:0");
-                temp2=readBits_UI64(inBuf,&bitPos,1);
-                switch(temp2){
-                  case 0:
-                    ////if(DEBUG)printf("0");
-                    ECQTemp=readBits_I64(inBuf,&bitPos,1);
-                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-                    ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
-                    ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
-                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                    break;
-                  case 1:
-                    ////if(DEBUG)printf("1\n");
-                    ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
-                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
-                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                    break;
-                  default:
-                    assert(0);
-                    break;
-                }
-                break;
-              case 1:
-                ////if(DEBUG)printf("Read:1\n");
-                ECQTemp=0;
-                ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
-                break;
-              default:
-                assert(0);
-                break;
-            }
-            
-            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
-            //continue;
-            //sb=_1DIdx/p->sbSize; 
-            //localIdx=_1DIdx%p->sbSize;
-            
-            //data[j]-=ECQTemp*bp->binSize; //Splitting
-            ECQ[j]=ECQTemp;
-            
-            ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
-          }
-          break;
-      }
-      //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
-      //patternQ=(int64_t*)(inBuf+15); 
-      //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
-      bytePos=(bitPos+7)/8;
-      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
-      
-      //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
-      pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
-      break;
-      
-    default:
-      assert(0);
-      break;
-  } 
-  (*numReadBytes)=bytePos;
-}
-
-static inline void pastri_float_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){
-  int64_t patternQ[MAX_PS_SIZE]; 
-  int64_t scalesQ[MAX_PS_SIZE];
-  int64_t ECQ[MAX_BLOCK_SIZE];
-  
-  pastri_blockParams bp;
-  
-  //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION))
-  //(Further steps are called inside pastri_float_Decode function)
-  pastri_float_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ);
-
-  return;
-}
-
-//inBuf vs Decompressed
-static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){
-  int i;
-  
-  float *data=(float*)(inBuf);
-  float *data_dc=(float*)(DC);
-  
-  //Comparing Indexes:
-  /*
-  for(i=0;i<p->bSize;i++){
-    if(idx0[i]!=idx0_dc[i]){
-      //printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
-      assert(0);
-    }
-    if(idx1[i]!=idx1_dc[i]){
-      //printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
-      assert(0);
-    }
-    if(idx2[i]!=idx2_dc[i]){
-      //printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
-      assert(0);
-    }
-    if(idx3[i]!=idx3_dc[i]){
-      //printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
-      assert(0);
-    }
-  }
-  */
-  
-  //Comparing Data:
-  for(i=0;i<p->bSize;i++){
-    if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
-      //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
-      assert(0);
-    }
-  }
-  return 0;
-}
-
-
-#endif
--- a/deps/SZ/sz/include/pastriGeneral.h
+++ b/deps/SZ/sz/include/pastriGeneral.h
@ -1,205 +0,0 @@
-#ifndef PASTRIGENERAL_H
-#define PASTRIGENERAL_H
-
-
-static inline double abs_FastD(double x){
-  u_UI64I64D u1;
-  u1.d=x;
-  //(*((uint64_t *)(&x)))&=(int64_t)0x7FFFFFFFFFFFFFFF;
-  u1.ui64&=(int64_t)0x7FFFFFFFFFFFFFFF;
-  return u1.d;
-}
-
-static inline int64_t abs_FastI64(int64_t x){
-  return (x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0);
-}
-/*
-int abs(int x) {
-   int mask = (x >> (sizeof(int) * CHAR_BIT - 1));
-   return (x + mask) ^ mask;
-}
-*/
-
-
-
-
-//Returns the min. bits needed to represent x.
-//Same as: ceil(log2(abs(x))) 
-//Actually to be completely safe, it correspond to: ceil(log2(abs(i)+1))+0.1
-//+0.1 was for fixing rounding errors
-//REMEMBER: To represent the whole range [-x:x], the number of bits required is bitsNeeded(x)+1
-static inline int bitsNeeded_double(double x){
-  u_UI64I64D u1;
-  u1.d=x;
-  return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31);
-}
-
-//Returns the min. bits needed to represent x.
-//Same as: ceil(log2(abs(x))) 
-//NEEDS OPTIMIZATION!
-static inline int bitsNeeded_float(float x){
-  u_UI64I64D u1;
-  u1.d=x; //Casting to Double!
-  return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31);
-}
-
-static inline int bitsNeeded_UI64(uint64_t x){
-  int shift;
-  int res=0;
-  
-  //Get the absolute value of x:
-  //x=(x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0);
-  //x=abs_FastI64(x);
-  
-  //printf("%d\n",(x&(uint64_t)0xFFFFFFFF00000000)!=0);
-  shift=(((x&(uint64_t)0xFFFFFFFF00000000)!=0)*32);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x00000000FFFF0000)!=0);
-  shift=(((x&(uint64_t)0x00000000FFFF0000)!=0)*16);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x000000000000FF00)!=0);
-  shift=(((x&(uint64_t)0x000000000000FF00)!=0)*8);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x00000000000000F0)!=0);
-  shift=(((x&(uint64_t)0x00000000000000F0)!=0)*4);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x000000000000000C)!=0);
-  shift=(((x&(uint64_t)0x000000000000000C)!=0)*2);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x0000000000000002)!=0);
-  shift=((x&(uint64_t)0x0000000000000002)!=0);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("%d\n",(x&(uint64_t)0x0000000000000001)!=0);
-  shift=((x&(uint64_t)0x0000000000000001)!=0);
-  x>>=shift;
-  res+=shift;
-  
-  //printf("BITS NEEDED: %d\n",res);
-  return res;
-}
-
-static inline int bitsNeeded_I64(int64_t x){
-  uint64_t ux;
-  ux=abs_FastI64(x);
-  return bitsNeeded_UI64(ux);
-}
-
-//Implementations(They are inline, so they should be in this header file)
-
-static inline int myEndianType(){ //Should work for most cases. May not work at mixed endian systems.
-  uint64_t n=1;
-  if (*(unsigned char*)&n == 1){
-    //cout<<"Little-Endian"<<endl;
-    return 0;  //0 for little endian
-  }
-  else{
-    //cout<<"Big-Endian"<<endl;
-    return 1; //1 for big endian
-  }
-}
-
-static inline void flipBytes_UI64(uint64_t *dataPtr){
-  unsigned char*tempA;
-  char temp8b;
-  tempA=(unsigned char*)dataPtr;
-  temp8b=tempA[7];
-  tempA[7]=tempA[0];
-  tempA[0]=temp8b;
-  temp8b=tempA[6];
-  tempA[6]=tempA[1];
-  tempA[1]=temp8b;
-  temp8b=tempA[5];
-  tempA[5]=tempA[2];
-  tempA[2]=temp8b;
-  temp8b=tempA[4];
-  tempA[4]=tempA[3];
-  tempA[3]=temp8b;
-  return;
-}
-
-//WARNING: readBits works properly only on Little Endian machines! (For Big Endians, some modifications are needed)
-
-static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
-    uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
-    //cout<<"bitPos:"<<(*bitPosPtr)<<"\tbitPos>>3:"<<(*bitPosPtr>>3)<<endl;
-    uint64_t temp64b = *(uint64_t*)(buffer + ( *bitPosPtr >> 3)); 
-    //NOTE: bitPos>>3 is the same as bitPos/8
-    temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007;
-    
-    //cout<<endl;
-    //cout<<"bitpos>>3:"<<(bitPos>>3)<<" bitPos&0x7:"<<(bitPos & 0x00000007)<<" bitPos%8:"<<(bitPos%8)<<endl;
-    //cout<<"Read:"<<(temp64b & mask)<<" temp64b:"<<temp64b<<" Mask:"<<mask<<" numBits:"<<numBits<<endl;
-    
-    (*bitPosPtr) += numBits;
-    return (temp64b & mask);
-}
-
-static inline int64_t readBits_I64(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
-  int64_t val;
-  val=readBits_UI64(buffer,bitPosPtr,numBits);//Read value
-  int64_t shiftAmount=64-numBits;
-  val=(val<<shiftAmount)>>shiftAmount;//Sign correction
-  return val;
-}
-
-//WARNING: readBits_EndianSafe is not tested on Big-Endian machines
-static inline uint64_t readBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
-    uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
-    uint64_t temp64b = *(uint64_t*)(buffer + ((*bitPosPtr)>>3)); 
-    //NOTE: (*bitPosPtr)>>3 is the same as (*bitPosPtr)/8
-    if(myEndianType())
-      flipBytes_UI64(&temp64b);
-    temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007;
-    (*bitPosPtr) += numBits;
-    return temp64b & mask;
-}
-
-//WARNING: writeBits_Fast works properly only on Little Endian machines! (For Big Endians, some modifications are needed)
-//The buffer should be initialized as 0's for this to work!
-//Also, the range of data is not checked!(If data exceeds numBits, it may be cause problems)
-static inline void writeBits_Fast(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,int64_t data){
-    //if(DEBUG){printf("writeBits_Fast: data:0x%lx %ld\n",data,data);} //DEBUG
-    //if(DEBUG){printf("writeBits_Fast: numBits:0x%lx %ld\n",numBits,numBits);} //DEBUG
-    uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
-    //if(DEBUG){printf("writeBits_Fast: mask:0x%lx %ld\n",mask,mask);} //DEBUG
-    //if(DEBUG){printf("writeBits_Fast: data&mask:0x%lx %ld\n",((*(uint64_t*)&data)&mask),((*(uint64_t*)&data)&mask));} //DEBUG
-    
-    //if(DEBUG){printf("writeBits_Fast: buffer_O:0x%lx\n",*(uint64_t*)(buffer + ((*bitPosPtr)>>3)));} //DEBUG
-    *(uint64_t*)(buffer + ((*bitPosPtr)>>3)) |= ((*(uint64_t*)&data)&mask) << ((*bitPosPtr) & (uint64_t)0x0000000000000007);
-    //if(DEBUG){printf("writeBits_Fast: buffer_N:0x%lx\n",*(uint64_t*)(buffer + ((*bitPosPtr)>>3)));} //DEBUG
-
-    
-    (*bitPosPtr) += numBits;
-}
-
-//WARNING: writeBits_EndianSafe is not tested on Big-Endian machines
-static inline void writeBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,uint64_t data){
-    uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
-    data=data&mask;
-    uint64_t temp64b_inBuffer=*(uint64_t*)(buffer + ((*bitPosPtr)>>3));
-    uint64_t temp64b_outBuffer=data << ((*bitPosPtr) & (uint64_t)0x0000000000000007);
-    if(myEndianType()){
-      flipBytes_UI64(&temp64b_inBuffer);
-    }
-    temp64b_outBuffer |= temp64b_inBuffer;
-    if(myEndianType()){
-      flipBytes_UI64(&temp64b_outBuffer);
-    }
-    *(uint64_t*)(buffer + ((*bitPosPtr)>>3))=temp64b_outBuffer;  // "|=" may also work
-    (*bitPosPtr) += numBits;
-}
-
-
-#endif
--- a/deps/SZ/sz/include/rw.h
+++ b/deps/SZ/sz/include/rw.h
@ -1,89 +0,0 @@
-/**
- *  @file io.h
- *  @author Sheng Di
- *  @date April, 2015
- *  @brief Header file for the whole io interface.
- *  (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _IO_H
-#define _IO_H
-
-#include <stdio.h>
-#include <stdint.h>
-
-#ifdef _WIN32
-#define PATH_SEPARATOR ';'
-#else
-#define PATH_SEPARATOR ':'
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int checkFileExistance(char* filePath);
-
-float** create2DArray_float(size_t m, size_t n);
-void free2DArray_float(float** data, size_t m);
-float*** create3DArray_float(size_t p, size_t m, size_t n);
-void free3DArray_float(float*** data, size_t p, size_t m);
-double** create2DArray_double(size_t m, size_t n);
-void free2DArray_double(double** data, size_t m);
-double*** create3DArray_double(size_t p, size_t m, size_t n);
-void free3DArray_double(double*** data, size_t p, size_t m);
-size_t checkFileSize(char *srcFilePath, int *status);
-
-unsigned char *readByteData(char *srcFilePath, size_t *byteLength, int *status);
-double *readDoubleData(char *srcFilePath, size_t *nbEle, int *status);
-int8_t *readInt8Data(char *srcFilePath, size_t *nbEle, int *status);
-int16_t *readInt16Data(char *srcFilePath, size_t *nbEle, int *status);
-uint16_t *readUInt16Data(char *srcFilePath, size_t *nbEle, int *status);
-int32_t *readInt32Data(char *srcFilePath, size_t *nbEle, int *status);
-uint32_t *readUInt32Data(char *srcFilePath, size_t *nbEle, int *status);
-int64_t *readInt64Data(char *srcFilePath, size_t *nbEle, int *status);
-uint64_t *readUInt64Data(char *srcFilePath, size_t *nbEle, int *status);
-float *readFloatData(char *srcFilePath, size_t *nbEle, int *status);
-unsigned short* readShortData(char *srcFilePath, size_t *dataLength, int *status);
-
-double *readDoubleData_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-int8_t *readInt8Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-int16_t *readInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-int64_t *readInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-uint64_t *readUInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-float *readFloatData_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
-
-void writeByteData(unsigned char *bytes, size_t byteLength, char *tgtFilePath, int *status);
-void writeDoubleData(double *data, size_t nbEle, char *tgtFilePath, int *status);
-void writeFloatData(float *data, size_t nbEle, char *tgtFilePath, int *status);
-void writeDataSZ(void *data, int dataType, size_t nbEle, char *tgtFilePath, int *status);
-void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *status);
-void writeDoubleData_inBytes(double *data, size_t nbEle, char* tgtFilePath, int *status);
-void writeShortData_inBytes(short *states, size_t stateLength, char *tgtFilePath, int *status);
-void writeUShortData_inBytes(unsigned short *states, size_t stateLength, char *tgtFilePath, int *status);
-void writeIntData_inBytes(int *states, size_t stateLength, char *tgtFilePath, int *status);
-void writeUIntData_inBytes(unsigned int *states, size_t stateLength, char *tgtFilePath, int *status);
-void writeLongData_inBytes(int64_t *states, size_t stateLength, char *tgtFilePath, int *status);
-void writeULongData_inBytes(uint64_t *states, size_t stateLength, char *tgtFilePath, int *status);
-
-void writeStrings(int nbStr, char *str[], char *tgtFilePath, int *status);
-
-//void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int endianType, char *tgtFilePath, int *status);
-
-void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize);
-void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength);
-void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle);
-void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle);
-void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len);
-void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len);
-void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _IO_H  ----- */
--- a/deps/SZ/sz/include/sz.h
+++ b/deps/SZ/sz/include/sz.h
@ -18,7 +18,6 @@
 #include "CompressElement.h"
 #include "DynamicByteArray.h"
 #include "DynamicIntArray.h"
-#include "VarSet.h"
 #include "TightDataPointStorageD.h"
 #include "TightDataPointStorageF.h"
 #include "conf.h"
@ -29,15 +28,8 @@
 #include "sz_double.h"
 #include "szd_float.h"
 #include "szd_double.h"
-#include "sz_opencl.h"
 #include "callZlib.h"
-#include "rw.h"
-#include "pastri.h"
 #include "utility.h"
-#include "CacheTable.h"
-#include "MultiLevelCacheTable.h"
-#include "MultiLevelCacheTableWideInterval.h"
-#include "sz_stats.h"

 #ifdef _WIN32
 #define PATH_SEPARATOR ';'
@ -172,15 +164,6 @@ extern sz_params *confparams_cpr;
 extern sz_params *confparams_dec;
 extern sz_exedata *exe_params;

-//------------------------------------------------
-extern SZ_VarSet* sz_varset;
-extern sz_multisteps *multisteps; //compression based on multiple time steps (time-dimension based compression)
-extern sz_tsc_metadata *sz_tsc;
-
-//for pastri 
-#ifdef PASTRI
-extern pastri_params pastri_par; 
-#endif


 void SZ_Finalize();
--- a/deps/SZ/sz/include/sz_float.h
+++ b/deps/SZ/sz/include/sz_float.h
@ -6,7 +6,6 @@
 *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */
-#include "DynamicFloatArray.h"

 #ifndef _SZ_Float_H
 #define _SZ_Float_H
@ -14,13 +13,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize);

 void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue);

 unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision);

-unsigned int optimize_intervals_and_compute_dense_position_float_1D(float *oriData, size_t dataLength, double realPrecision, float * dense_pos);
 unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision);

 TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, 
@ -29,13 +26,6 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 bool SZ_compress_args_float_NoCkRngeNoGzip_1D( unsigned char* newByteData, float *oriData, 
 size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f);

-size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data);
-
-size_t SZ_compress_float_1D_MDQ_RA_block_1D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, DynamicFloatArray * unpredictable_data);
-void SZ_blocked_regression(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, float *params);
-
-unsigned char * SZ_compress_float_1D_MDQ_RA(float *oriData, size_t r1, double realPrecision, size_t * comp_size);
-

 void SZ_compress_args_float_withinRange(unsigned char* newByteData, float *oriData, size_t dataLength, size_t *outSize);

--- a/deps/SZ/sz/include/sz_opencl.h
+++ b/deps/SZ/sz/include/sz_opencl.h
@ -1,68 +0,0 @@
-//make header C++/C inter-operable
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef SZ_OPENCL_H
-#define SZ_OPENCL_H
-
-#include<stddef.h>
-
-	//opaque pointer for opencl state
-  struct sz_opencl_state;
-
-  /**
-   * creates an opencl state for multiple uses of the compressor or
-   * returns an error code.
-   *
-   * \post if return code is SZ_NCES, the state object may only be passed to
-   * sz_opencl_release or sz_opencl_error_* otherwise it may be used in any
-   * sz_opencl_* function.
-   *
-   * \param[out] state the sz opencl state
-   * \return SZ_SUCCESS for success or SZ_NCES on error
-   */
-  int sz_opencl_init(struct sz_opencl_state** state);
-
-	/**
-	 * deinitializes an opencl state
-	 *
-	 * \param[in] state the sz opencl state
-	 * \return SZ_SUCCESS
-	 */
-  int sz_opencl_release(struct sz_opencl_state** state);
-
-	/**
-	 * returns a human readable error message for the last error recieved by state
-	 *
-	 * \param[in] state the sz opencl state
-	 * \return a pointer to a string that describes the error
-	 */
-	const char* sz_opencl_error_msg(struct sz_opencl_state* state);
-
-
-	/**
-	 * returns a numeric code for the last error recieved by state
-	 *
-	 * \param[in] state the sz opencl state
-	 * \return the numeric error code
-	 */
-  int sz_opencl_error_code(struct sz_opencl_state* state);
-
-	/**
-	 * confirms that the sz opencl state is ready to use by performing a vector addition
-	 *
-	 * \param[in] state the sz opencl state
-	 * \return SZ_SUCCESS if the opencl implementation is functioning
-	 */
-	int sz_opencl_check(struct sz_opencl_state*);
-
-  unsigned char* sz_compress_float3d_opencl(float* data, size_t r1, size_t r2, size_t r3, double, size_t* out_size);
-
-
-#endif /* SZ_OPENCL_H */
-
-//make header C++/C inter-operable
-#ifdef __cplusplus
-}
-#endif
--- a/deps/SZ/sz/include/sz_stats.h
+++ b/deps/SZ/sz/include/sz_stats.h
@ -1,58 +0,0 @@
-/**
- *  @file ByteToolkit.h
- *  @author Sheng Di
- *  @date July, 2017
- *  @brief Header file for the ByteToolkit.c.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef _STATS_H
-#define _STATS_H
-
-#include <stdint.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct sz_stats
-{
-	int use_mean;
-	
-	size_t blockSize;
-	
-	float lorenzoPercent;
-	float regressionPercent;
-	size_t lorenzoBlocks;
-	size_t regressionBlocks;
-	size_t totalBlocks;
-	
-	//size_t huffmanTreeHeight;
-	size_t huffmanTreeSize; //before the final zstd
-	size_t huffmanCodingSize; //before the final zstd
-	float huffmanCompressionRatio;
-	int huffmanNodeCount;
-		
-	size_t unpredictCount;
-	float unpredictPercent;
-	
-	float zstdCompressionRatio; //not available yet
-	
-} sz_stats;
-
-extern sz_stats sz_stat;
-
-
-void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks);
-void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNocdeCount);
-void writeZstdCompressionRatio(float zstdCompressionRatio);
-void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements);
-void printSZStats();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ----- #ifndef _STATS_H  ----- */
--- a/deps/SZ/sz/src/ArithmeticCoding.c
+++ b/deps/SZ/sz/src/ArithmeticCoding.c
@ -1,692 +0,0 @@
-/**
- *  @file ArithmeticCoding.c
- *  @author Sheng Di, Mark Thomas Nelson
- *  @date April, 2016
- *  @brief Byte Toolkit
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- *  (C) The MIT License (MIT), this code was modified from Mark's arithmetic coding code: http://www.drdobbs.com/cpp/data-compression-with-arithmetic-encodin/240169251?pgno=1
- */
-#include <sz.h>
-#include <ArithmeticCoding.h>
-
-inline void output_bit_1(unsigned int* buf)
-{
-	(*buf) = (*buf) << 1;
-	(*buf) |= 1;
-}
-
-inline void output_bit_0(unsigned int* buf)
-{
-	(*buf) = (*buf) << 1;
-	//(*byte) |= 0; //actually doesn't have to set the bit to 0
-}
-
-//TODO: problematic
-inline unsigned int output_bit_1_plus_pending(int pending_bits)
-{
-	unsigned int buf = 0, pbits = pending_bits;
-	output_bit_1(&buf);
-	while(pbits--)
-		output_bit_0(&buf);
-	buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit, which would be easier for the final output
-	return buf;
-}
-
-inline unsigned int output_bit_0_plus_pending(int pending_bits)
-{
-	unsigned int buf = 0, pbits = pending_bits;
-	//output_bit_0(&buf);
-	while(pbits--)
-		output_bit_1(&buf);
-	buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit
-	return buf;
-}
-
-/**
- * Create AriCoder for the following arithmetic encoding operation. 
- * In this function, it will compute the real frequency of the integer codes.
- * @param int numOfStates (input): numOfStates is the real # states calculated to the optimization_num_of_interval code
- * @param int *s (input): the integer code array (i.e., type_array generated by prediction+quantization)
- * @param size_t length: the number of integer codes in the type_array
- * 
- * */
-AriCoder *createAriCoder(int numOfStates, int *s, size_t length)
-{
-	AriCoder *ariCoder = (AriCoder*)malloc(sizeof(AriCoder));
-	memset(ariCoder, 0, sizeof(AriCoder));
-	ariCoder->numOfRealStates = numOfStates;
-	ari_init(ariCoder, s, length);
-    return ariCoder;
-}
-
-void freeAriCoder(AriCoder *ariCoder)
-{
-	free(ariCoder->cumulative_frequency);
-	free(ariCoder);
-}
-
-void ari_init(AriCoder *ariCoder, int *s, size_t length)
-{
-	size_t i; //# states is in the range of integer.
-	int index = 0;
-	size_t *freq = (size_t *)malloc(ariCoder->numOfRealStates*sizeof(size_t));
-	memset(freq, 0, ariCoder->numOfRealStates*sizeof(size_t));
-	for(i = 0;i < length;i++) 
-	{
-		index = s[i];
-		freq[index]++;
-	}
- 
-	int counter = 0;
-	size_t _sum = 0, sum = 0, freqDiv = 0;
-	ariCoder->cumulative_frequency = (Prob *)malloc(ariCoder->numOfRealStates*sizeof(Prob));
-	
-	memset(ariCoder->cumulative_frequency, 0, ariCoder->numOfRealStates*sizeof(Prob));
-	
-	if(length <= MAX_INTERVALS)
-	{
-		for (index = 0; index < ariCoder->numOfRealStates; index++)
-		{
-			if (freq[index]) 
-			{
-				sum += freq[index];
-				(ariCoder->cumulative_frequency[index]).low = _sum;
-				(ariCoder->cumulative_frequency[index]).high = sum;
-				(ariCoder->cumulative_frequency[index]).state = index;
-				_sum = sum;
-				counter++;
-			}
-		}
-		ariCoder->numOfValidStates = counter;
-		ariCoder->total_frequency = sum;		
-	}
-	else
-	{
-		int intvSize = length%MAX_INTERVALS==0?length/MAX_INTERVALS:length/MAX_INTERVALS+1;
-		for (index = 0; index < ariCoder->numOfRealStates; index++)
-		{
-			if (freq[index]) 
-			{
-				freqDiv = freq[index]/intvSize; //control the sum of frequency to be no greater than MAX_INTERVALS
-				if(freqDiv==0)
-					freqDiv = 1;
-				sum += freqDiv;
-				(ariCoder->cumulative_frequency[index]).low = _sum;
-				(ariCoder->cumulative_frequency[index]).high = sum;
-				(ariCoder->cumulative_frequency[index]).state = index;
-				_sum = sum;
-				counter++;
-			}
-		}
-		ariCoder->numOfValidStates = counter;
-		ariCoder->total_frequency = sum;			
-	}
-
-	free(freq);
-}
-
-/**
- * Convert AriCoder to bytes for storage
- * @param AriCoder* ariCoder (input)
- * @param unsigned char** out (output)
- * 
- * @return outSize
- * */
-unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out)
-{
-	int numOfRealStates = ariCoder->numOfRealStates;
-	int numOfValidStates = ariCoder->numOfValidStates;
-	uint64_t total_frequency = ariCoder->total_frequency;
-	Prob* cumulative_frequency = ariCoder->cumulative_frequency;
-	
-	unsigned int outSize = 0;
-	*out = (unsigned char*)malloc(2*sizeof(int)+sizeof(uint64_t)+sizeof(Prob)*numOfRealStates);
-	
-	unsigned char* p = *out;
-	intToBytes_bigEndian(p, numOfRealStates);
-	p+=sizeof(int);
-	intToBytes_bigEndian(p, numOfValidStates);
-	p+=sizeof(int);
-	int64ToBytes_bigEndian(p, total_frequency);
-	p+=sizeof(uint64_t);
-	size_t i = 0;
-	if(total_frequency <= 65536)
-	{
-		uint16_t low, high;
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint16_t)(cumulative_frequency[i].high);				
-				if(high!=0) //if this state cell is not null
-				{
-					low = (uint16_t)(cumulative_frequency[i].low);
-					int16ToBytes_bigEndian(p,low);
-					p+=sizeof(uint16_t);
-					int16ToBytes_bigEndian(p,high);
-					p+=sizeof(uint16_t);
-					*(p++)=(unsigned char)cumulative_frequency[i].state;
-					//if(((unsigned char)cumulative_frequency[i].state)==129)
-					//	printf("break i=%zu\n", i);
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*5; //2*sizeof(uint16_t)+1
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint16_t)(cumulative_frequency[i].high);				
-				if(high!=0)
-				{
-					low = (uint16_t)(cumulative_frequency[i].low);
-					int16ToBytes_bigEndian(p,low);
-					p+=sizeof(uint16_t);
-					int16ToBytes_bigEndian(p,high);
-					p+=sizeof(uint16_t);
-					uint16_t state = (uint16_t)cumulative_frequency[i].state;
-					int16ToBytes_bigEndian(p, state);
-					p+=sizeof(uint16_t);
-				}
-			}	
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*6;
-		}
-		else
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint16_t)(cumulative_frequency[i].high);				
-				if(high!=0)
-				{
-					low = (uint16_t)(cumulative_frequency[i].low);
-					int16ToBytes_bigEndian(p,low);
-					p+=sizeof(uint16_t);
-					int16ToBytes_bigEndian(p,high);
-					p+=sizeof(uint16_t);
-					int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
-					p+=sizeof(uint32_t);
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*8;
-		}
-	}
-	else if(total_frequency <=4294967296)
-	{
-		uint32_t low, high;
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint32_t)(cumulative_frequency[i].high);				
-				if(high!=0)
-				{
-					low = (uint32_t)(cumulative_frequency[i].low);
-					int32ToBytes_bigEndian(p,low);
-					p+=sizeof(uint32_t);
-					int32ToBytes_bigEndian(p,high);
-					p+=sizeof(uint32_t);
-					*(p++)=(unsigned char)cumulative_frequency[i].state;					
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*9;
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint32_t)(cumulative_frequency[i].high);
-				if(high!=0)
-				{
-					low = (uint32_t)(cumulative_frequency[i].low);
-					int32ToBytes_bigEndian(p,low);
-					p+=sizeof(uint32_t);
-					int32ToBytes_bigEndian(p,high);
-					p+=sizeof(uint32_t);
-					uint16_t state = (uint16_t)cumulative_frequency[i].state;
-					int16ToBytes_bigEndian(p, state);
-					p+=sizeof(uint16_t);
-					
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*10;
-		}
-		else
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint32_t)(cumulative_frequency[i].high);
-				if(high!=0)
-				{
-					low = (uint32_t)(cumulative_frequency[i].low);
-					int32ToBytes_bigEndian(p,low);
-					p+=sizeof(uint32_t);
-					int32ToBytes_bigEndian(p,high);
-					p+=sizeof(uint32_t);
-					int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
-					p+=sizeof(uint32_t);
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*12;
-		}
-	}
-	else
-	{
-		uint64_t low, high;
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint64_t)(cumulative_frequency[i].high);
-				if(high!=0)
-				{
-					low = (uint64_t)(cumulative_frequency[i].low);
-					int64ToBytes_bigEndian(p,low);
-					p+=sizeof(uint64_t);
-					int64ToBytes_bigEndian(p,high);
-					p+=sizeof(uint64_t);
-					*(p++)=(unsigned char)cumulative_frequency[i].state;
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*17;
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint64_t)(cumulative_frequency[i].high);
-				if(high!=0)
-				{
-					low = (uint64_t)(cumulative_frequency[i].low);
-					int64ToBytes_bigEndian(p,low);
-					p+=sizeof(uint64_t);
-					int64ToBytes_bigEndian(p,high);
-					p+=sizeof(uint64_t);
-					uint16_t state = (uint16_t)cumulative_frequency[i].state;
-					int16ToBytes_bigEndian(p, state);
-					p+=sizeof(uint16_t);					
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*18;
-		}
-		else
-		{
-			for(i=0;i<numOfRealStates;i++)
-			{
-				high = (uint64_t)(cumulative_frequency[i].high);
-				if(high!=0)
-				{
-					low = (uint64_t)(cumulative_frequency[i].low);
-					int64ToBytes_bigEndian(p,low);
-					p+=sizeof(uint64_t);
-					int64ToBytes_bigEndian(p,high);
-					p+=sizeof(uint64_t);
-					int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
-					p+=sizeof(uint32_t);					
-				}
-			}
-			outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*20;
-		}
-	}
-	return outSize;
-}
-
-/**
- * Reconstruct AriCoder based on the bytes loaded from compressed data
- * @param AriCoder** ariCoder (ourput)
- * @param unsigned char* bytes (input)
- * 
- * @return offset
- * */
-int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes)
-{
-	int offset = 0;
-	
-	*ariCoder = (AriCoder*)malloc(sizeof(AriCoder));
-	memset(*ariCoder, 0, sizeof(AriCoder));
-	
-	unsigned char *p = bytes;
-	int numOfRealStates = (*ariCoder)->numOfRealStates = bytesToInt_bigEndian(p);
-	p += sizeof(int);
-	int numOfValidStates = (*ariCoder)->numOfValidStates = bytesToInt_bigEndian(p);
-	p += sizeof(int);
-	size_t total_frequency = (*ariCoder)->total_frequency = bytesToInt64_bigEndian(p);
-	p += sizeof(uint64_t);
-	
-	(*ariCoder)->cumulative_frequency = (Prob*)malloc((*ariCoder)->numOfRealStates*sizeof(Prob));
-	memset((*ariCoder)->cumulative_frequency, 0, (*ariCoder)->numOfRealStates*sizeof(Prob));
-	size_t i = 0;
-	unsigned char *low_p = NULL, *high_p = NULL, *state_p = NULL;
-	int state = 0;
-	if(total_frequency <= 65536)
-	{
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint16_t);
-				state_p = high_p+sizeof(uint16_t);
-				state = *state_p;
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + 1;
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*5; //2*sizeof(uint16_t)+1
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint16_t);
-				state_p = high_p+sizeof(uint16_t);
-				state = bytesToUInt16_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint16_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*6;
-		}
-		else
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint16_t);
-				state_p = high_p+sizeof(uint16_t);
-				state = bytesToUInt32_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint32_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*8;
-		}
-	}
-	else if(total_frequency <=4294967296)
-	{
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint32_t);
-				state_p = high_p+sizeof(uint32_t);
-				state = *state_p;
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + 1;
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*9;
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint32_t);
-				state_p = high_p+sizeof(uint32_t);
-				state = bytesToUInt16_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint16_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*10;
-		}
-		else
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint32_t);
-				state_p = high_p+sizeof(uint32_t);
-				state = bytesToUInt32_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint32_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*12;
-		}
-	}
-	else
-	{
-		if(numOfRealStates<=256)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint64_t);
-				state_p = high_p+sizeof(uint64_t);
-				state = *state_p;
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + 1;
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*17;
-		}
-		else if(numOfRealStates<=65536)
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint64_t);
-				state_p = high_p+sizeof(uint64_t);
-				state = bytesToUInt16_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint16_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*18;
-		}
-		else
-		{
-			for(i=0;i<numOfValidStates;i++)
-			{
-				low_p = p;
-				high_p = low_p+sizeof(uint64_t);
-				state_p = high_p+sizeof(uint64_t);
-				state = bytesToUInt32_bigEndian(state_p);
-				
-				(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
-				(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
-				(*ariCoder)->cumulative_frequency[state].state = state;
-				
-				p = state_p + sizeof(uint32_t);
-			}
-			offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*20;
-		}
-	}
-	return offset;
-}
-
-/**
- * Arithmetic Encoding
- * @param AriCoder *ariCoder (input)
- * @param int *s (input)
- * @param size_t length (input)
- * @param unsigned char *out (output)
- * @param size_t *outSize (output)
- * 
- * */
-void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize)
-{
-	int pending_bits = 0;
-	size_t low = 0;
-	size_t high = MAX_CODE;
-	size_t i = 0, range = 0;
-	size_t count = ariCoder->total_frequency;
-	int c = 0, lackBits = 0;
-	*outSize = 0;
-	
-	unsigned char *outp = out;
-	
-	Prob *cumulative_frequency = ariCoder->cumulative_frequency;
-	unsigned int buf = 0;
-	
-	for (i=0;i<length;i++)
-	{
-		c = s[i];
-		Prob p = cumulative_frequency[c];
-		range = high - low + 1;
-		high = low + (range * p.high / count) - 1;
-		low = low + (range * p.low / count);
-		for ( ; ; ) 
-		{
-			if ( high < ONE_HALF )
-			{
-				buf = output_bit_0_plus_pending(pending_bits);
-				put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
-				pending_bits = 0;
-			}
-			else if ( low >= ONE_HALF )
-			{
-				buf = output_bit_1_plus_pending(pending_bits);
-				put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
-				pending_bits = 0;			
-			}
-			else if ( low >= ONE_FOURTH && high < THREE_FOURTHS ) 
-			{
-				pending_bits++;
-				low -= ONE_FOURTH;
-				high -= ONE_FOURTH;
-			} else
-				break;
-			high <<= 1;
-			high++;
-			low <<= 1;
-			high &= MAX_CODE;
-			low &= MAX_CODE;
-		}
-	}
-	pending_bits++;
-	if(low < ONE_FOURTH)
-	{
-		buf = output_bit_0_plus_pending(pending_bits);
-		put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
-	}
-	else
-	{
-		buf = output_bit_1_plus_pending(pending_bits);
-		put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
-	}	
-}
-
-/**
- * Get the integer code based on Arithmetic Coding Value 
- * @param AriCoder *ariCoder (input)
- * @param size_t scaled_value (input)
- * 
- * @return Prob* (output)
- * 
- * */
-Prob* getCode(AriCoder *ariCoder, size_t scaled_value)
-{
-	int numOfRealStates = ariCoder->numOfRealStates;
-	int i = 0;
-	Prob *p = ariCoder->cumulative_frequency;
-	for(i=0;i<numOfRealStates;i++,p++)
-	{
-		if(scaled_value < p->high)
-			break;
-	}
-	return p;
-}
-
-/**
- * Get one bit from the input stream of bytes
- * @param unsigned char* p (input): the current location to be read (byte) of the byte stream
- * @param int offset (input): the offset of the specified byte in the byte stream
- * 
- * @return unsigned char (output) : 1 or 0
- * */
-inline unsigned char get_bit(unsigned char* p, int offset)
-{
-	return ((*p) >> (7-offset)) & 0x01;
-}
-
-/**
- * Arithmetic Decoding algorithm 
- * @param AriCoder *ariCoder (input): the encoder with the constructed frequency information
- * @param unsigned char *s (input): the compressed stream of bytes
- * @param size_t s_len (input): the number of bytes in the 'unsigned char *s'
- * @param size_t targetLength (input): the target number of elements in the type array
- * @param int *out (output) : the result (type array decompressed from the stream 's')
- * 
- * */
-void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out)
-{
-	size_t high = MAX_CODE;
-	size_t low = 0, i = 0;
-	size_t range = 0, scaled_value = 0;
-	size_t total_frequency = ariCoder->total_frequency;
-	unsigned char *sp = s+5;
-	unsigned int offset = 4;
-	size_t value = (bytesToUInt64_bigEndian(s) >> 20); //alignment with the MAX_CODE
-	size_t s_counter = sizeof(int);
-	
-	for(i=0;i<targetLength;i++)
-	{
-		range = high -  low + 1;
-		scaled_value = ((value - low + 1) * ariCoder->total_frequency  - 1 ) / range;
-		Prob *p = getCode(ariCoder, scaled_value);
-		out[i] = p->state;  //output the state to the 'out' array
-		high = low + (range*p->high)/total_frequency -1;
-		low = low + (range*p->low)/total_frequency;
-		
-		for( ; ; )
-		{
-			if (high < ONE_HALF) {
-			  //do nothing, bit is a zero
-			} else if ( low >= ONE_HALF ) 
-			{
-			  value -= ONE_HALF;  //subtract one half from all three code values
-			  low -= ONE_HALF;
-			  high -= ONE_HALF;
-			} else if ( low >= ONE_FOURTH && high < THREE_FOURTHS ) 
-			{
-			  value -= ONE_FOURTH;
-			  low -= ONE_FOURTH;
-			  high -= ONE_FOURTH;
-			} else
-			  break;
-			low <<= 1;
-			high <<= 1;
-			high++;
-			value <<= 1;
-			//load one bit from the input byte stream	
-			if(s_counter < s_len)
-			{	
-				value += get_bit(sp, offset++);
-				if(offset==8)
-				{
-					sp++;
-					s_counter++;
-					offset = 0;
-				}
-			}
-		}
-	}
-}
--- a/deps/SZ/sz/src/CacheTable.c
+++ b/deps/SZ/sz/src/CacheTable.c
@ -1,100 +0,0 @@
-/**
- *  @file CacheTable.c
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Cache Table
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdlib.h>
-#include "CacheTable.h"
-
-double* g_CacheTable;
-uint32_t * g_InverseTable;
-uint32_t baseIndex;
-uint32_t topIndex;
-int bits;
-
-inline int doubleGetExpo(double d){
-    long* ptr = (long*)&d;
-    *ptr = ((*ptr) >> 52) - 1023;
-    return *ptr;
-}
-
-int CacheTableGetRequiredBits(double precision, int quantization_intervals){
-    double min_distance = pow((1+precision), -(quantization_intervals>>1)) * precision;
-    return -(doubleGetExpo(min_distance));
-}
-
-inline uint32_t CacheTableGetIndex(float value, int bits){
-    uint32_t* ptr = (uint32_t*)&value;
-    int shift = 32 - 9 - bits;
-    if(shift>0){
-        return (*ptr) >> shift;
-    }else{
-        return 0;
-    }
-}
-
-inline uint64_t CacheTableGetIndexDouble(double value, int bits){
-    uint64_t* ptr = (uint64_t*)&value;
-    int shift = 64 - 12 - bits;
-    if(shift>0){
-        return (*ptr) >> shift;
-    }else{
-        return 0;
-    }
-}
-
-inline int CacheTableIsInBoundary(uint32_t index){
-    if(index <= topIndex && index > baseIndex){
-        return 1;
-    }else{
-        return 0;
-    }
-}
-
-void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals){
-    bits = CacheTableGetRequiredBits(precision, quantization_intervals);
-    baseIndex = CacheTableGetIndex((float)smallest, bits)+1;
-    topIndex = CacheTableGetIndex((float)largest, bits);
-    uint32_t range = topIndex - baseIndex + 1;
-    g_InverseTable = (uint32_t *)malloc(sizeof(uint32_t) * range);
-
-    /*
-    uint32_t fillInPos = 0;
-    for(int i=0; i<count; i++){
-        if(i == 0){
-            continue;
-        }
-        uint32_t index = CacheTableGetIndex((float)table[i], bits) - baseIndex;
-        g_InverseTable[index] = i;
-        if(index > fillInPos){
-            for(int j=fillInPos; j<index; j++){
-                g_InverseTable[j] = g_InverseTable[index];
-            }
-        }
-        fillInPos = index + 1;
-    }
-     */
-    for(int i=count-1; i>0; i--){
-        uint32_t upperIndex = CacheTableGetIndex((float)table[i]*(1+precision), bits);
-        uint32_t lowerIndex = CacheTableGetIndex((float)table[i]/(1+precision), bits);
-        for(uint32_t j = lowerIndex; j<=upperIndex; j++){
-            if(j<baseIndex || j >topIndex){
-                continue;
-            }
-            g_InverseTable[j-baseIndex] = i;
-        }
-    }
-
-}
-
-inline uint32_t CacheTableFind(uint32_t index){
-    return g_InverseTable[index-baseIndex];
-}
-
-void CacheTableFree(){
-    free(g_InverseTable);
-}
--- a/deps/SZ/sz/src/DynamicDoubleArray.c
+++ b/deps/SZ/sz/src/DynamicDoubleArray.c
@ -1,57 +0,0 @@
-/**
- *  @file DynamicFloatArray.c
- *  @author Sheng Di
- *  @date May, 2016
- *  @brief Dynamic Float Array
- *  (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdlib.h> 
-#include <stdio.h>
-#include <string.h>
-#include "DynamicDoubleArray.h"
-
-void new_DDA(DynamicDoubleArray **dda, size_t cap) {
-		*dda = (DynamicDoubleArray *)malloc(sizeof(DynamicDoubleArray));
-        (*dda)->size = 0;
-        (*dda)->capacity = cap;
-        (*dda)->array = (double*)malloc(sizeof(double)*cap);
-    }
-
-void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data)
-{
-	size_t size = dba->size;
-	if(size>0)
-		*data = (double*)malloc(size * sizeof(double));
-	else
-		*data = NULL;
-	memcpy(*data, dba->array, size*sizeof(double));	
-}
-
-void free_DDA(DynamicDoubleArray *dda)
-{
-	free(dda->array);
-	free(dda);
-}
-
-double getDDA_Data(DynamicDoubleArray *dda, size_t pos)
-{
-	if(pos>=dda->size)
-	{
-		printf("Error: wrong position of DIA.\n");
-		exit(0);
-	}
-	return dda->array[pos];
-}
-
-void addDDA_Data(DynamicDoubleArray *dda, double value)
-{
-	if(dda->size==dda->capacity)
-	{
-		dda->capacity *= 2;
-		dda->array = (double *)realloc(dda->array, dda->capacity*sizeof(double));
-	}
-	dda->array[dda->size] = value;
-	dda->size ++;
-}
--- a/deps/SZ/sz/src/DynamicFloatArray.c
+++ b/deps/SZ/sz/src/DynamicFloatArray.c
@ -1,57 +0,0 @@
-/**
- *  @file DynamicFloatArray.c
- *  @author Sheng Di
- *  @date May, 2016
- *  @brief Dynamic Float Array
- *  (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdlib.h> 
-#include <stdio.h>
-#include <string.h>
-#include "DynamicFloatArray.h"
-
-void new_DFA(DynamicFloatArray **dfa, size_t cap) {
-		*dfa = (DynamicFloatArray *)malloc(sizeof(DynamicFloatArray));
-        (*dfa)->size = 0;
-        (*dfa)->capacity = cap;
-        (*dfa)->array = (float*)malloc(sizeof(float)*cap);
-    }
-
-void convertDFAtoFloats(DynamicFloatArray *dfa, float **data)
-{
-	size_t size = dfa->size;
-	if(size>0)
-		*data = (float*)malloc(size * sizeof(float));
-	else
-		*data = NULL;
-	memcpy(*data, dfa->array, size*sizeof(float));	
-}
-
-void free_DFA(DynamicFloatArray *dfa)
-{
-	free(dfa->array);
-	free(dfa);
-}
-
-float getDFA_Data(DynamicFloatArray *dfa, size_t pos)
-{
-	if(pos>=dfa->size)
-	{
-		printf("Error: wrong position of DIA.\n");
-		exit(0);
-	}
-	return dfa->array[pos];
-}
-
-void addDFA_Data(DynamicFloatArray *dfa, float value)
-{
-	if(dfa->size==dfa->capacity)
-	{
-		dfa->capacity *= 2;
-		dfa->array = (float *)realloc(dfa->array, dfa->capacity*sizeof(float));
-	}
-	dfa->array[dfa->size] = value;
-	dfa->size++;
-}
--- a/deps/SZ/sz/src/MultiLevelCacheTable.c
+++ b/deps/SZ/sz/src/MultiLevelCacheTable.c
@ -1,193 +0,0 @@
-/**
- *  @file MultiLevelCacheTable.c
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Header file.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdint.h>
-#include <memory.h>
-#include <stdlib.h>
-#include "stdio.h"
-#include "MultiLevelCacheTable.h"
-
-uint8_t MLCT_GetExpoIndex(float value){
-    uint32_t* ptr = (uint32_t*)&value;
-    return (*ptr) >> 23;
-}
-
-uint8_t MLCT_GetRequiredBits(float precision){
-    int32_t* ptr = (int32_t*)&precision;
-    return -(((*ptr) >> 23) - 127);
-}
-
-
-uint32_t MLCT_GetMantiIndex(float value, int bits){
-    uint32_t* ptr = (uint32_t*)&value;
-    (*ptr) = (*ptr) << 9 >> 9;
-    int shift = 32 - 9 - bits;
-    if(shift > 0){
-        return (*ptr) >> shift;
-    }else{
-        return (*ptr);
-    }
-}
-
-float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits){
-    float result = 0;
-    uint32_t *ptr = (uint32_t*)&result;
-    *ptr = expo;
-    (*ptr) = (*ptr) << 23;
-    (*ptr) |= (manti << (23-bits));
-    return result;
-}
-
-void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision){
-    uint8_t bits = MLCT_GetRequiredBits(precision);
-    topTable->bits = bits;
-    topTable->bottomBoundary = precisionTable[1]/(1+precision);
-    topTable->topBoundary = precisionTable[count-1]/(1-precision);
-    topTable->baseIndex = MLCT_GetExpoIndex(topTable->bottomBoundary);
-    topTable->topIndex = MLCT_GetExpoIndex(topTable->topBoundary);
-    int subTableCount = topTable->topIndex - topTable->baseIndex + 1;
-    topTable->subTables = (struct SubLevelTable*)malloc(sizeof(struct SubLevelTable) * subTableCount);
-    memset(topTable->subTables, 0, sizeof(struct SubLevelTable) * subTableCount);
-
-    //uint32_t expoBoundary[subTableCount];
-    uint8_t lastExpo = 0xff;
-    uint8_t lastIndex = 0;
-    for(int i=0; i<count; i++){
-        uint8_t expo = MLCT_GetExpoIndex(precisionTable[i]);
-        if(expo != lastExpo){
-            //expoBoundary[lastIndex] = i;
-            lastExpo = expo;
-            lastIndex++;
-        }
-    }
-
-    for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
-        struct SubLevelTable* processingSubTable = &topTable->subTables[i];
-        if(i == topTable->topIndex - topTable->baseIndex &&
-            MLCT_GetExpoIndex(topTable->topBoundary) == MLCT_GetExpoIndex(precisionTable[count-1])){
-            processingSubTable->topIndex = MLCT_GetMantiIndex(topTable->topBoundary, bits) - 1;
-        }else{
-            uint32_t maxIndex = 0;
-            for(int j=0; j<bits; j++){
-                maxIndex += 1 << j;
-            }
-            processingSubTable->topIndex = maxIndex;
-        }
-        if(i == 0 && MLCT_GetExpoIndex(topTable->bottomBoundary) == MLCT_GetExpoIndex(precisionTable[0])){
-            processingSubTable->baseIndex = MLCT_GetMantiIndex(topTable->bottomBoundary, bits)+1;
-        }else{
-            processingSubTable->baseIndex = 0;
-        }
-
-        int subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1;
-        processingSubTable->table = (uint32_t*)malloc(sizeof(uint32_t) * subTableLength);
-        memset(processingSubTable->table, 0, sizeof(uint32_t) * subTableLength);
-        processingSubTable->expoIndex = topTable->baseIndex + i;
-    }
-
-    uint32_t index = 1;
-    for(uint8_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){
-        struct SubLevelTable* processingSubTable = &topTable->subTables[i];
-        uint8_t expoIndex = i+topTable->baseIndex;
-        for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){
-            uint32_t mantiIndex = j+processingSubTable->baseIndex;
-            float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits);
-            float bottomBoundary = precisionTable[index] / (1+precision);
-            float topBoundary = precisionTable[index] / (1-precision);
-            if(sample < topBoundary && sample > bottomBoundary){
-                processingSubTable->table[j] = index;
-            }else{
-                //float newPrecision = precisionTable[index];
-                index++;
-                processingSubTable->table[j] = index;
-                if(j)
-                    processingSubTable->table[j-1] = index;
-                else{
-                    struct SubLevelTable* pastSubTable = &topTable->subTables[i-1];
-                    pastSubTable->table[pastSubTable->topIndex - pastSubTable->baseIndex] = index;
-                }
-            }
-        }
-        if(i == topTable->topIndex - topTable->baseIndex){
-            uint32_t j = processingSubTable->topIndex - processingSubTable->baseIndex + 1;
-            uint32_t mantiIndex = j + processingSubTable->baseIndex;
-            float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits);
-            float bottomBoundary = precisionTable[index] / (1+precision);
-            float topBoundary = precisionTable[index] / (1-precision);
-            if(sample > topBoundary || sample < bottomBoundary){
-                index++;
-                processingSubTable->table[j-1] = index;
-            }
-        }
-    }
-
-    /*
-    long lastIndexInExpoRange = count-1;
-    bool trigger = false;
-    float preRange = 0.0;
-    uint32_t preIndex = 0;
-    for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
-        struct SubLevelTable* processingSubTable = &topTable->subTables[i];
-        if(trigger){
-            uint32_t bound = MLCT_GetMantiIndex(preRange, bits);
-            for(int j = processingSubTable->topIndex; j>=processingSubTable->baseIndex; j--){
-                if(j >= bound){
-                    processingSubTable->table[j-processingSubTable->baseIndex] = preIndex;
-                }else{
-                    break;
-                }
-            }
-            trigger = false;
-        }
-        long firstIndexInExpoRange = expoBoundary[i];
-        uint8_t expoInRange = MLCT_GetExpoIndex(precisionTable[firstIndexInExpoRange]);
-        for(int j=lastIndexInExpoRange; j>=firstIndexInExpoRange; j--){
-            float test = precisionTable[j];
-            uint32_t rangeTop = MLCT_GetMantiIndex(precisionTable[j]*(1+precision), bits) - 1;
-            uint32_t rangeBottom;
-            if(j == firstIndexInExpoRange){
-                preRange = precisionTable[j]/(1+precision);
-                if(expoInRange != MLCT_GetExpoIndex(preRange)){
-                    trigger = true;
-                    preIndex = firstIndexInExpoRange;
-                    rangeBottom = 0;
-                }else{
-                    rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1;
-                }
-            }else{
-                rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1;
-            }
-            for(int k = rangeBottom; k<=rangeTop; k++){
-                if( k <= processingSubTable->topIndex && k >= processingSubTable->baseIndex)
-                    processingSubTable->table[k - processingSubTable->baseIndex] = j;
-            }
-        }
-        lastIndexInExpoRange = firstIndexInExpoRange-1;
-    }
-     */
-}
-
-uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable){
-    uint8_t expoIndex = MLCT_GetExpoIndex(value);
-    if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){
-        struct SubLevelTable* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex];
-        uint32_t mantiIndex = MLCT_GetMantiIndex(value, topLevelTable->bits);
-        MLTC_RebuildFloat(expoIndex, mantiIndex, topLevelTable->bits);
-        if(mantiIndex >= subLevelTable->baseIndex && mantiIndex <= subLevelTable->topIndex)
-            return subLevelTable->table[mantiIndex - subLevelTable->baseIndex];
-    }
-    return 0;
-}
-
-void MultiLevelCacheTableFree(struct TopLevelTable* table){
-    for(int i=0; i<table->topIndex - table->baseIndex + 1; i++){
-        free(table->subTables[i].table);
-    }
-    free(table->subTables);
-}
--- a/deps/SZ/sz/src/MultiLevelCacheTableWideInterval.c
+++ b/deps/SZ/sz/src/MultiLevelCacheTableWideInterval.c
@ -1,125 +0,0 @@
-/**
- *  @file MultiLevelCacheTableWideInterval.h
- *  @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
- *  @date Jan, 2019
- *  @brief Header file.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdbool.h>
-#include "MultiLevelCacheTableWideInterval.h"
-
-void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable)
-{
-	for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--)
-	{
-		struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
-		free(processingSubTable->table);
-	}
-	free(topTable->subTables);
-}
-
-uint16_t MLCTWI_GetExpoIndex(double value){
-    uint64_t* ptr = (uint64_t*)&value;
-    return (*ptr) >> 52;
-}
-
-uint16_t MLCTWI_GetRequiredBits(double precision){
-    uint64_t* ptr = (uint64_t*)&precision;
-    return -(((*ptr) >> 52) - 1023);
-}
-
-uint64_t MLCTWI_GetMantiIndex(double value, int bits){
-    uint64_t* ptr = (uint64_t*)&value;
-    (*ptr) = (*ptr) << 12 >> 12;
-    int shift = 64 - 12 - bits;
-    if(shift > 0){
-        return (*ptr) >> shift;
-    }else{
-        return (*ptr);
-    }
-}
-
-double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits){
-    double result = 0;
-    uint64_t *ptr = (uint64_t*)&result;
-    *ptr = expo;
-    (*ptr) = (*ptr) << 52;
-    (*ptr) += (manti << (52-bits));
-    return result;
-}
-
-void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits){
-    uint16_t bits = MLCTWI_GetRequiredBits(precision) + plus_bits;
-    topTable->bits = bits;
-    topTable->bottomBoundary = precisionTable[1]/(1+precision);
-    topTable->topBoundary = precisionTable[count-1]/(1-precision);
-    topTable->baseIndex = MLCTWI_GetExpoIndex(topTable->bottomBoundary);
-    topTable->topIndex = MLCTWI_GetExpoIndex(topTable->topBoundary);
-    int subTableCount = topTable->topIndex - topTable->baseIndex + 1;
-    topTable->subTables = (struct SubLevelTableWideInterval*)malloc(sizeof(struct SubLevelTableWideInterval) * subTableCount);
-    memset(topTable->subTables, 0, sizeof(struct SubLevelTableWideInterval) * subTableCount);
-
-    for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
-        struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
-
-        uint32_t maxIndex = 0;
-        for(int j=0; j<bits; j++){
-            maxIndex += 1 << j;
-        }
-        processingSubTable->topIndex = maxIndex;
-        processingSubTable->baseIndex = 0;
-
-        uint64_t subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1;
-        processingSubTable->table = (uint16_t*)malloc(sizeof(uint16_t) * subTableLength);
-        memset(processingSubTable->table, 0, sizeof(uint16_t) * subTableLength);
-        processingSubTable->expoIndex = topTable->baseIndex + i;
-    }
-
-
-    uint32_t index = 0;
-    bool flag = false;
-    for(uint16_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){
-        struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
-        uint16_t expoIndex = i+topTable->baseIndex;
-        for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){
-            uint64_t mantiIndex = j + processingSubTable->baseIndex;
-            double sampleBottom = MLTCWI_RebuildDouble(expoIndex, mantiIndex, topTable->bits);
-            double sampleTop = MLTCWI_RebuildDouble(expoIndex, mantiIndex+1, topTable->bits);
-            double bottomBoundary = precisionTable[index] / (1+precision);
-            double topBoundary = precisionTable[index] / (1-precision);
-            if(sampleTop < topBoundary && sampleBottom > bottomBoundary){
-                processingSubTable->table[j] = index;
-                flag = true;
-            }else{
-                if(flag && index < count-1){
-                    index++;
-                    processingSubTable->table[j] = index;
-                }else{
-                    processingSubTable->table[j] = 0;
-                }
-            }
-        }
-    }
-
-}
-
-uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable){
-    uint16_t expoIndex = MLCTWI_GetExpoIndex(value);
-    if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){
-        struct SubLevelTableWideInterval* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex];
-        uint64_t mantiIndex = MLCTWI_GetMantiIndex(value, topLevelTable->bits);
-        return subLevelTable->table[mantiIndex - subLevelTable->baseIndex];
-
-    }
-    return 0;
-}
-
-void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table){
-    for(int i=0; i<table->topIndex - table->baseIndex + 1; i++){
-        free(table->subTables[i].table);
-    }
-    free(table->subTables);
-}
-
--- a/deps/SZ/sz/src/TypeManager.c
+++ b/deps/SZ/sz/src/TypeManager.c
@ -70,59 +70,36 @@ size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size
 	return byteLength;
 }

-void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)	
+
+void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
 {
-    if(intArrayLength > byteArrayLength*8)
-    {
-    	printf("Error: intArrayLength > byteArrayLength*8\n");
-    	printf("intArrayLength=%zu, byteArrayLength = %zu", intArrayLength, byteArrayLength);
-    	exit(0);
-    }
-	if(intArrayLength>0)
-		*intArray = (unsigned char*)malloc(intArrayLength*sizeof(unsigned char));
-	else
-		*intArray = NULL;    
-    
-	size_t n = 0, i;
-	int tmp;
-	for (i = 0; i < byteArrayLength-1; i++) 
+	if(stepLength > byteArrayLength*4)
 	{
-		tmp = byteArray[i];
-		(*intArray)[n++] = (tmp & 0x80) >> 7;
-		(*intArray)[n++] = (tmp & 0x40) >> 6;
-		(*intArray)[n++] = (tmp & 0x20) >> 5;
-		(*intArray)[n++] = (tmp & 0x10) >> 4;
-		(*intArray)[n++] = (tmp & 0x08) >> 3;
-		(*intArray)[n++] = (tmp & 0x04) >> 2;
-		(*intArray)[n++] = (tmp & 0x02) >> 1;
-		(*intArray)[n++] = (tmp & 0x01) >> 0;		
+		printf("Error: stepLength > byteArray.length*4\n");
+		printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
+		exit(0);
+	}
+	if(stepLength>0)
+		*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
+	else
+		*intArray = NULL;
+	size_t i, n = 0;
+
+	for (i = 0; i < byteArrayLength; i++) {
+		unsigned char tmp = byteArray[i];
+		(*intArray)[n++] = (tmp & 0xC0) >> 6;
+		if(n==stepLength)
+			break;
+		(*intArray)[n++] = (tmp & 0x30) >> 4;
+		if(n==stepLength)
+			break;
+		(*intArray)[n++] = (tmp & 0x0C) >> 2;
+		if(n==stepLength)
+			break;
+		(*intArray)[n++] = tmp & 0x03;
+		if(n==stepLength)
+			break;
 	}
-	
-	tmp = byteArray[i];	
-	if(n == intArrayLength)
-		return;
-	(*intArray)[n++] = (tmp & 0x80) >> 7;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x40) >> 6;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x20) >> 5;
-	if(n == intArrayLength)
-		return;
-	(*intArray)[n++] = (tmp & 0x10) >> 4;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x08) >> 3;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x04) >> 2;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x02) >> 1;
-	if(n == intArrayLength)
-		return;	
-	(*intArray)[n++] = (tmp & 0x01) >> 0;		
 }

 /**
@ -174,195 +151,6 @@ size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t tim
 	return byteLength;
 }

-size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result)
-{
-	size_t i, j, byteLength = 0;
-	if(timeStepTypeLength%4==0)
-		byteLength = timeStepTypeLength*2/8;
-	else
-		byteLength = timeStepTypeLength*2/8+1;
-
-	size_t n = 0;
-	for(i = 0;i<byteLength;i++)
-	{
-		int tmp = 0;
-		/*for(j = 0;j<4&&n<timeStepTypeLength;j++)
-		{
-			int type = timeStepType[n];
-			switch(type)
-			{
-			case 0: 
-				
-				break;
-			case 1:
-				tmp = (tmp | (1 << (6-j*2)));
-				break;
-			case 2:
-				tmp = (tmp | (2 << (6-j*2)));
-				break;
-			case 3:
-				tmp = (tmp | (3 << (6-j*2)));
-				break;
-			default:
-				printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type);
-				exit(0);
-			}
-			n++;
-		}*/
-		for(j = 0;j<4&&n<timeStepTypeLength;j++)
-		{
-			unsigned char type = timeStepType[n];
-			tmp = tmp | type << (6-(j<<1));
-			n++;
-		}
-		result[i] = (unsigned char)tmp;
-	}
-	return byteLength;
-}
-
-void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
-{
-	if(stepLength > byteArrayLength*4)
-	{
-		printf("Error: stepLength > byteArray.length*4\n");
-		printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
-		exit(0);
-	}
-	if(stepLength>0)
-		*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
-	else
-		*intArray = NULL;
-	size_t i, n = 0;
-
-	for (i = 0; i < byteArrayLength; i++) {
-		unsigned char tmp = byteArray[i];
-		(*intArray)[n++] = (tmp & 0xC0) >> 6;
-		if(n==stepLength)
-			break;
-		(*intArray)[n++] = (tmp & 0x30) >> 4;
-		if(n==stepLength)
-			break;
-		(*intArray)[n++] = (tmp & 0x0C) >> 2;
-		if(n==stepLength)
-			break;
-		(*intArray)[n++] = tmp & 0x03;
-		if(n==stepLength)
-			break;
-	}
-}
-
-size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result)
-{	
-	size_t i = 0, k = 0, byteLength = 0, n = 0;
-	if(timeStepTypeLength%8==0)
-		byteLength = timeStepTypeLength*3/8;
-	else
-		byteLength = timeStepTypeLength*3/8+1;
-
-	if(byteLength>0)
-		*result = (unsigned char*)malloc(byteLength*sizeof(unsigned char));
-	else
-		*result = NULL;
-	int tmp = 0;
-	for(n = 0;n<timeStepTypeLength;n++)
-	{
-		k = n%8;
-		switch(k)
-		{
-		case 0:
-			tmp = tmp | (timeStepType[n] << 5);
-			break;
-		case 1:
-			tmp = tmp | (timeStepType[n] << 2);
-			break;
-		case 2: 
-			tmp = tmp | (timeStepType[n] >> 1);
-			(*result)[i++] = (unsigned char)tmp;
-			tmp = 0 | (timeStepType[n] << 7);
-			break;
-		case 3:
-			tmp = tmp | (timeStepType[n] << 4);
-			break;
-		case 4:
-			tmp = tmp | (timeStepType[n] << 1);
-			break;
-		case 5:
-			tmp = tmp | (timeStepType[n] >> 2);
-			(*result)[i++] = (unsigned char)tmp;
-			tmp = 0 | (timeStepType[n] << 6);
-			break;
-		case 6:
-			tmp = tmp | (timeStepType[n] << 3);
-			break;
-		case 7:
-			tmp = tmp | (timeStepType[n] << 0);
-			(*result)[i++] = (unsigned char)tmp;
-			tmp = 0;
-			break;
-		}
-	}
-	if(k!=7) //load the last one
-		(*result)[i] = (unsigned char)tmp;
-	
-	return byteLength;
-}
-
-void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
-{	
-	if(stepLength > byteArrayLength*8/3)
-	{
-		printf("Error: stepLength > byteArray.length*8/3, impossible case unless bugs elsewhere.\n");
-		printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
-		exit(0);		
-	}
-	if(stepLength>0)
-		*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
-	else
-		*intArray = NULL;
-	size_t i = 0, ii = 0, n = 0;
-	unsigned char tmp = byteArray[i];	
-	for(n=0;n<stepLength;)
-	{
-		switch(n%8)
-		{
-		case 0:
-			(*intArray)[n++] = (tmp & 0xE0) >> 5;
-			break;
-		case 1: 
-			(*intArray)[n++] = (tmp & 0x1C) >> 2;
-			break;
-		case 2:
-			ii = (tmp & 0x03) << 1;
-			i++;
-			tmp = byteArray[i];
-			ii |= (tmp & 0x80) >> 7;
-			(*intArray)[n++] = ii;
-			break;
-		case 3:
-			(*intArray)[n++] = (tmp & 0x70) >> 4;
-			break;
-		case 4:
-			(*intArray)[n++] = (tmp & 0x0E) >> 1;
-			break;
-		case 5:
-			ii = (tmp & 0x01) << 2;
-			i++;
-			tmp = byteArray[i];
-			ii |= (tmp & 0xC0) >> 6;
-			(*intArray)[n++] = ii;
-			break;
-		case 6: 
-			(*intArray)[n++] = (tmp & 0x38) >> 3;
-			break;
-		case 7:
-			(*intArray)[n++] = (tmp & 0x07);
-			i++;
-			tmp = byteArray[i];
-			break;
-		}
-	}
-}
-
 inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength)
 {
 	return 8 - k%8 - resiBitLength;
@ -412,92 +200,4 @@ size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsig
 	size_t size = dba->size;
 	free_DBA(dba);
 	return size;
-}
-
-/**
- * 
- * @param timeStepType is the resiMidBits
- * @param resiBitLength is the length of resiMidBits for each element, (the number of resiBitLength == the # of unpredictable elements
- * @return
- */
-size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes)
-{
-	size_t i = 0, j = 0, k = 0; 
-	int value;
-	DynamicByteArray* dba;
-	new_DBA(&dba, 1024);
-	int tmp = 0, leftMovSteps = 0;
-	for(j = 0;j<resiBitLengthLength;j++)
-	{
-		unsigned char rbl = resiBitLength[j];
-		if(rbl==0)
-			continue;
-		value = timeStepType[i];
-		leftMovSteps = getLeftMovingSteps(k, rbl);
-		if(leftMovSteps < 0)
-		{
-			tmp = tmp | (value >> (-leftMovSteps));
-			addDBA_Data(dba, (unsigned char)tmp);
-			tmp = 0 | (value << (8+leftMovSteps));
-		}
-		else if(leftMovSteps > 0)
-		{
-			tmp = tmp | (value << leftMovSteps);
-		}
-		else //==0
-		{
-			tmp = tmp | value;
-			addDBA_Data(dba, (unsigned char)tmp);
-			tmp = 0;
-		}
-		i++;
-		k += rbl;
-	}
-	if(leftMovSteps != 0)
-		addDBA_Data(dba, (unsigned char)tmp);
-	convertDBAtoBytes(dba, bytes);
-	size_t size = dba->size;
-	free_DBA(dba);
-	return size;
-}
-
-int computeBitNumRequired(size_t dataLength)
-{
-	if(exe_params->SZ_SIZE_TYPE==4)
-		return 32 - numberOfLeadingZeros_Int(dataLength);
-	else
-		return 64 - numberOfLeadingZeros_Long(dataLength);
-		
-}
-
-void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength)
-{
-	size_t pairLength = (bytesLength*8)/(validLength+1);
-	size_t tmpLength = pairLength*2;
-	int tmpResult[tmpLength];
-	size_t i, j, k = 0;
-	for(i = 0;i<tmpLength;i+=2)
-	{
-		size_t outIndex = k/8;
-		int innerIndex = k%8;
-
-		unsigned char curByte = bytes[outIndex];
-		tmpResult[i] = (curByte >> (8-1-innerIndex)) & 0x01;
-		k++;
-		
-		int numResult = extractBytes(bytes, k, validLength);
-		
-		tmpResult[i+1] = numResult;
-		k = k + validLength;
-	}
-	
-	*result = (int*)malloc(sizeof(int)*totalLength);
-	k = 0;
-	for(i = 0;i<tmpLength;i=i+2)
-	{
-		int state = tmpResult[i];
-		size_t num = tmpResult[i+1];
-		for(j = 0;j<num;j++)
-			(*result)[k++] = state;
-	}
-}
+}
--- a/deps/SZ/sz/src/VarSet.c
+++ b/deps/SZ/sz/src/VarSet.c
@ -1,254 +0,0 @@
-/**
- *  @file Variable.c
- *  @author Sheng Di
- *  @date July, 2016
- *  @brief TypeManager is used to manage the type array: parsing of the bytes and other types in between.
- *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "VarSet.h"
-#include "sz.h"
-
-void free_Variable_keepOriginalData(SZ_Variable* v)
-{
-	if(v->varName!=NULL)
-		free(v->varName);	
-	if(v->compressedBytes!=NULL)
-		free(v->compressedBytes);
-	if(v->multisteps!=NULL)
-		free_multisteps(v->multisteps);	
-	free(v);
-}
-
-/**
- * 
- * @deprecated
- * */
-void free_Variable_keepCompressedBytes(SZ_Variable* v)
-{
-	if(v->varName!=NULL)
-		free(v->varName);
-	if(v->data!=NULL)
-		free(v->data);
-	if(v->multisteps!=NULL)
-		free_multisteps(v->multisteps);	
-	free(v);
-}
-
-void free_Variable_all(SZ_Variable* v)
-{
-	if(v->varName!=NULL)
-		free(v->varName);
-	if(v->data!=NULL)
-		free(v->data);
-	if(v->compressedBytes!=NULL)
-		free(v->compressedBytes);
-	if(v->multisteps!=NULL)
-		free_multisteps(v->multisteps);
-	free(v);
-}
-
-void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data, 
-			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, 
-			size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
-{	
-	if(sz_varset==NULL)
-	{
-		sz_varset = (SZ_VarSet*)malloc(sizeof(SZ_VarSet));
-		sz_varset->header = (SZ_Variable*)malloc(sizeof(SZ_Variable));
-		sz_varset->header->next = NULL;
-		sz_varset->lastVar = sz_varset->header;
-		sz_varset->count = 0;		
-	}
-	
-	SZ_Variable* var = (SZ_Variable*)malloc(sizeof(SZ_Variable));
-	memset(var, 0, sizeof(SZ_Variable));
-	var->var_id = var_id;
-	var->varName = (char*)malloc(strlen(varName)+1);
-	memcpy(var->varName, varName, strlen(varName)+1);
-	//var->varName = varName;
-	var->dataType = dataType;
-	var->r5 = r5;
-	var->r4 = r4;
-	var->r3 = r3;
-	var->r2 = r2;
-	var->r1 = r1;
-	var->errBoundMode = errBoundMode;
-	var->absErrBound = absErrBound;
-	var->relBoundRatio = relBoundRatio;
-	var->pwRelBoundRatio = pwRelBoundRatio;
-	var->data = data;
-	
-	var->multisteps = (sz_multisteps*)malloc(sizeof(sz_multisteps));
-	memset(var->multisteps, 0, sizeof(sz_multisteps));
-	
-	size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);
-	if(dataType==SZ_FLOAT)
-	{
-		var->multisteps->hist_data = (float*)malloc(sizeof(float)*dataLen);
-		memset(var->multisteps->hist_data, 0, sizeof(float)*dataLen);
-	}
-	else if(dataType==SZ_DOUBLE)
-	{
-		var->multisteps->hist_data = (double*)malloc(sizeof(double)*dataLen);
-		memset(var->multisteps->hist_data, 0, sizeof(double)*dataLen);
-	}
-	var->compressedBytes = NULL;
-	var->next = NULL;
-	
-	sz_varset->count ++;
-	sz_varset->lastVar->next = var;
-	sz_varset->lastVar = var;
-}
-
-int SZ_batchDelVar_ID(int var_id)
-{
-	int state = SZ_batchDelVar_ID_vset(sz_varset, var_id);
-	return state;
-}
-
-int SZ_batchDelVar(char* varName)
-{
-	int state = SZ_batchDelVar_vset(sz_varset, varName);
-	return state;
-}
-
-int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id)
-{
-	int delSuccess = SZ_FAILED;
-	SZ_Variable* p = vset->header;
-	SZ_Variable* q = p->next;
-	while(q != NULL)
-	{
-		if(q->var_id == var_id)
-		{
-			p->next = q->next;
-			//free_Variable_all(q);
-			free_Variable_keepOriginalData(q);
-			vset->count --;
-			delSuccess = SZ_SUCCESS;
-			if(q->next==NULL) //means that q is the last variable
-				vset->lastVar = p;			
-			break;
-		}
-			
-		p = p->next;
-		q = q->next;	
-	}
-	
-	return delSuccess;	
-}
-
-int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName)
-{
-	int delSuccess = SZ_FAILED;
-	SZ_Variable* p = vset->header;
-	SZ_Variable* q = p->next;
-	while(q != NULL)
-	{
-		int cmpResult = strcmp(q->varName, varName);
-		if(cmpResult==0)
-		{
-			p->next = q->next;
-			//free_Variable_all(q);
-			free_Variable_keepOriginalData(q);
-			vset->count --;
-			delSuccess = SZ_SUCCESS;
-			break;
-		}
-		p = p->next;
-		q = q->next;	
-	}
-	
-	return delSuccess;
-}
-
-SZ_Variable* SZ_searchVar(char* varName)
-{
-	SZ_Variable* p = sz_varset->header->next;
-	while(p!=NULL)
-	{
-		int checkName = strcmp(p->varName, varName);
-		if(checkName==0)
-			return p;
-		p = p->next;
-	}	
-	return NULL;
-}
-
-void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1)
-{
-	SZ_Variable* v = SZ_searchVar(varName);
-	*r5 = v->r5;
-	*r4 = v->r4;
-	*r3 = v->r3;
-	*r2 = v->r2;
-	*r1 = v->r1;
-	return (void*)v->data;
-}
-
-/**
- * 
- * int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET
- * */
-void SZ_freeVarSet(int mode)
-{
-	free_VarSet_vset(sz_varset, mode);
-}
-
-//free_VarSet will completely destroy the SZ_VarSet, so don't do it until you really don't need it any more!
-/**
- * 
- * int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET
- * */
-void free_VarSet_vset(SZ_VarSet *vset, int mode)
-{
-	if(vset==NULL)
-		return;
-	SZ_Variable *p = vset->header;
-	while(p->next!=NULL)
-	{
-		SZ_Variable *q = p->next;
-		p->next = q->next;
-		if(mode==SZ_MAINTAIN_VAR_DATA)
-			free_Variable_keepOriginalData(q);
-		else if(mode==SZ_DESTROY_WHOLE_VARSET)
-			free_Variable_all(q);
-	}
-	free(sz_varset->header);
-	free(vset);
-}
-
-void free_multisteps(sz_multisteps* multisteps)
-{
-	if(multisteps->hist_data!=NULL)
-		free(multisteps->hist_data);
-	free(multisteps);
-}
-
-inline int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count)
-{
-	int j = 0;
-	for(j=0;j<var_count;j++)
-	{
-		if(var_ids[j]==cur_var_id)
-			return 1;
-	}
-	return 0;
-}
-
-SZ_Variable* SZ_getVariable(int var_id)
-{
-	SZ_Variable* p = sz_varset->header->next;
-	while(p!=NULL)
-	{
-		if(var_id == p->var_id)
-			return p;
-		p = p->next;
-	}	
-	return NULL;
-} 
--- a/deps/SZ/sz/src/conf.c
+++ b/deps/SZ/sz/src/conf.c
@ -375,15 +375,7 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		//initialization for Huffman encoding
 		//SZ_Reset();	
 	}
-	else if(confparams_cpr->sol_ID == PASTRI)
-	{//load parameters for PSTRI
-		pastri_par.bf[0] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_0", 0);		
-		pastri_par.bf[1] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_1", 0);		
-		pastri_par.bf[2] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_2", 0);		
-		pastri_par.bf[3] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_3", 0);
-		pastri_par.numBlocks = (int)iniparser_getint(ini, "PARAMETER:numBlocks", 0);		
-		confparams_cpr->absErrBound = pastri_par.originalEb = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 1E-3);
-	}
+
 	
    iniparser_freedict(ini);
    return SZ_SUCCESS;
@ -408,31 +400,3 @@ int SZ_LoadConf(const char* sz_cfgFile) {
    }
    return SZ_SUCCESS;
 }
-
-int checkVersion(unsigned char version)
-{
-	return version <= versionNumber;
-}
-
-
-void initSZ_TSC()
-{
-	sz_tsc = (sz_tsc_metadata*)malloc(sizeof(sz_tsc_metadata));
-	memset(sz_tsc, 0, sizeof(sz_tsc_metadata));
-	/*sprintf(sz_tsc->metadata_filename, "sz_tsc_metainfo.txt");
-	sz_tsc->metadata_file = fopen(sz_tsc->metadata_filename, "wb");
-	if (sz_tsc->metadata_file == NULL)
-	{
-		printf("Failed to open sz_tsc_metainfo.txt file for writing metainfo.\n");
-		exit(1);
-	}
-	fputs("#metadata of the time-step based compression\n", sz_tsc->metadata_file);	*/
-}
-
-/*double fabs(double value)
-{
-	if(value<0)
-		return -value;
-	else
-		return value;
-}*/
--- a/deps/SZ/sz/src/pastri.c
+++ b/deps/SZ/sz/src/pastri.c
@ -1,87 +0,0 @@
-#include "pastri.h"
-#include "pastriD.h"
-#include "pastriF.h"
-
-void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr){
-  FILE *paramsF;
-  paramsF=fopen(paramsFilename,"r");
-  
-  if(paramsF==NULL){
-    printf("ERROR: Parameters file cannot be opened.\n");
-    printf("Filename: %s\n",paramsFilename);
-    assert(0);
-  }
-  
-  fscanf(paramsF,"%d %d %d %d %lf %d %d",&paramsPtr->bf[0],&paramsPtr->bf[1],&paramsPtr->bf[2],&paramsPtr->bf[3],&paramsPtr->originalEb,&paramsPtr->dataSize,&paramsPtr->numBlocks);
-  //printf("Params: %d %d %d %d %.3e %d\n",paramsPtr->bf[0],paramsPtr->bf[1],paramsPtr->bf[2],paramsPtr->bf[3],paramsPtr->originalEb,paramsPtr->numBlocks);
-  fclose(paramsF);
-}
-
-void SZ_pastriPreprocessParameters(pastri_params *p){
-  //Preprocess by calculating some pastri_params:
-  //Calculate sbSize, sbNum, etc.:
-  p->idxRange[0]=(p->bf[0]+1)*(p->bf[0]+2)/2;
-  p->idxRange[1]=(p->bf[1]+1)*(p->bf[1]+2)/2;
-  p->idxRange[2]=(p->bf[2]+1)*(p->bf[2]+2)/2;
-  p->idxRange[3]=(p->bf[3]+1)*(p->bf[3]+2)/2;
-  p->sbSize=p->idxRange[2]*p->idxRange[3];
-  p->sbNum=p->idxRange[0]*p->idxRange[1];
-  p->bSize=p->sbSize*p->sbNum;
-  p->usedEb=p->originalEb*0.999;  //This is needed just to eliminate some rounding errors. It has almost no effect on compression rate/ratios.
-}
-
-void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes){
-  (*compressedBufP) = (unsigned char*)calloc(p->numBlocks*p->bSize*p->dataSize,sizeof(char));
-  int bytes; //bytes for this block
-  int i;
-  size_t bytePos=0; //Current byte pos in the outBuf
-  
-  memcpy(*compressedBufP, p, sizeof(pastri_params));
-  bytePos+=sizeof(pastri_params);
-  
-  for(i=0;i<p->numBlocks;i++){
-    if(p->dataSize==8){
-      pastri_double_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes);
-    }else if(p->dataSize==4){
-      pastri_float_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes);
-    }
-    bytePos+=bytes;
-    //printf("bytes:%d\n",bytes);
-  }
-  *compressedBytes=bytePos;
-  //printf("totalBytesWritten:%d\n",*compressedBytes);
-}
-
-void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes){
-  int bytePos=0; //Current byte pos in the outBuf 
-  memcpy(p, compressedBuf, sizeof(pastri_params));
-  bytePos+=sizeof(pastri_params);	
-	
-  (*decompressedBufP) = (unsigned char*)malloc(p->numBlocks*p->bSize*p->dataSize*sizeof(char)); 
-  int bytes; //bytes for this block
-  int i;
-  
-  for(i=0;i<p->numBlocks;i++){
-    if(p->dataSize==8){
-      pastri_double_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes);
-    }else if(p->dataSize==4){
-      pastri_float_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes);
-    }
-          
-    bytePos += bytes;
-    //printf("bytes:%d\n",bytes);
-  }
-  //printf("totalBytesRead:%d\n",bytePos);
-  *decompressedBytes=p->numBlocks*p->bSize*p->dataSize;
-}
-
-void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf){        
-  int i;
-  for(i=0;i<p->numBlocks;i++){
-    if(p->dataSize==8){
-      pastri_double_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p);
-    }else if(p->dataSize==4){
-      pastri_float_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p);
-    }
-  }
-}
--- a/deps/SZ/sz/src/rw.c
+++ b/deps/SZ/sz/src/rw.c
--- a/deps/SZ/sz/src/rw_interface.F90
+++ b/deps/SZ/sz/src/rw_interface.F90
@ -1,205 +0,0 @@
-!  @file   sdc_interface.F90
-!  @author Sheng Di (disheng222@gmail.com)
-!  @date   Aug., 2014
-!  @ Mathematics and Computer Science (MCS)
-!  @ Argonne National Laboratory, Lemont, USA.
-!  @brief  The key Fortran binding file to connect C language and Fortran (Fortran part)
-
-
-MODULE RW
-	use :: ISO_C_BINDING
-
-	INTERFACE writeData
-		MODULE PROCEDURE WriteData_inBinary_d1_INTEGER_K1
-		MODULE PROCEDURE WriteData_inBinary_d1_REAL_K4
-		MODULE PROCEDURE WriteData_inBinary_d2_REAL_K4
-		MODULE PROCEDURE WriteData_inBinary_d3_REAL_K4
-		MODULE PROCEDURE WriteData_inBinary_d4_REAL_K4
-		MODULE PROCEDURE WriteData_inBinary_d5_REAL_K4
-		MODULE PROCEDURE WriteData_inBinary_d1_REAL_K8
-		MODULE PROCEDURE WriteData_inBinary_d2_REAL_K8
-		MODULE PROCEDURE WriteData_inBinary_d3_REAL_K8
-		MODULE PROCEDURE WriteData_inBinary_d4_REAL_K8
-		MODULE PROCEDURE WriteData_inBinary_d5_REAL_K8
-	END INTERFACE writeData
-
-	INTERFACE readData
-		MODULE PROCEDURE readByteData
-		MODULE PROCEDURE readFloatData
-		MODULE PROCEDURE readDoubleData
-	END INTERFACE readData
-
-	CONTAINS
-
-	!Bytes here could be an "allocatable" array, so it requires an extra "byteLength" io indicate the length (can't use size(Bytes))
-	SUBROUTINE WriteData_inBinary_d1_INTEGER_K1(Bytes, byteLength, FILE_PATH)
-		implicit none
-		INTEGER(KIND=1), DIMENSION(:) :: Bytes
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER(KIND=C_SIZE_T) :: byteLength
-
-		CALL writeByteFile(Bytes, byteLength, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d1_INTEGER_K1
-
-	SUBROUTINE WriteData_inBinary_d1_REAL_K4(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=4), DIMENSION(:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeFloatFile(VAR, nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d1_REAL_K4
-
-	SUBROUTINE WriteData_inBinary_d2_REAL_K4(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=4), DIMENSION(:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d2_REAL_K4
-
-	SUBROUTINE WriteData_inBinary_d3_REAL_K4(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=4), DIMENSION(:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d3_REAL_K4
-
-	SUBROUTINE WriteData_inBinary_d4_REAL_K4(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d4_REAL_K4
-
-	SUBROUTINE WriteData_inBinary_d5_REAL_K4(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d5_REAL_K4
-
-!write data in binary for K8 data
-
-	SUBROUTINE WriteData_inBinary_d1_REAL_K8(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=8), DIMENSION(:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeDoubleFile(VAR, nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d1_REAL_K8
-
-	SUBROUTINE WriteData_inBinary_d2_REAL_K8(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=8), DIMENSION(:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d2_REAL_K8
-
-	SUBROUTINE WriteData_inBinary_d3_REAL_K8(VAR, FILE_PATH)
-		implicit none
-		REAL(KIND=8), DIMENSION(:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d3_REAL_K8
-
-	SUBROUTINE WriteData_inBinary_d4_REAL_K8(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d4_REAL_K8
-
-	SUBROUTINE WriteData_inBinary_d5_REAL_K8(VAR, nbEle, FILE_PATH)
-		implicit none
-		REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER :: nbEle
-
-		CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
-	END SUBROUTINE WriteData_inBinary_d5_REAL_K8
-
-!Check file size
-	SUBROUTINE checkFileSize(FILE_PATH, BYTESIZE)
-		implicit none
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER(kind=C_SIZE_T) :: BYTESIZE
-
-		CALL checkFileSizeC(FILE_PATH, len(trim(FILE_PATH)), BYTESIZE)
-	END SUBROUTINE checkFileSize
-
-!Read data
-	SUBROUTINE readByteData(FILE_PATH, Bytes, outSize)
-		implicit none
-		INTEGER(KIND=1), DIMENSION(:), allocatable :: temp
-		INTEGER(KIND=1), DIMENSION(:), allocatable :: Bytes
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER(kind=C_SIZE_T) :: COUNTER
-		INTEGER(kind=C_SIZE_T), intent(out) :: outSize !in bytes
-		
-		CALL checkFileSize(FILE_PATH, outSize)
-		allocate(temp(outSize))
-
-		CALL readByteFile(FILE_PATH, len(trim(FILE_PATH)), temp, outSize)
-		allocate(Bytes(outSize))
-		DO COUNTER=1,outSize,1
-			Bytes(COUNTER) = temp(COUNTER)
-		END DO
-		deallocate(temp)
-	END SUBROUTINE readByteData
-
-	SUBROUTINE readFloatData(FILE_PATH, VAR, nbEle)
-		implicit none
-		REAL(KIND=4), DIMENSION(:), allocatable :: temp
-		REAL(KIND=4), DIMENSION(:), allocatable :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER(kind=C_SIZE_T) :: COUNTER, fileSize
-		INTEGER(kind=C_SIZE_T), intent(out) :: nbEle
-
-		CALL checkFileSize(FILE_PATH, fileSize)
-		nbEle = fileSize/4
-		allocate(temp(nbEle))
-		
-		CALL readFloatFile(FILE_PATH, len(trim(FILE_PATH)), temp, nbEle)
-		allocate(VAR(nbEle))
-		DO COUNTER=1,fileSize,1
-			VAR(COUNTER) = temp(COUNTER)
-		END DO		
-		deallocate(temp)
-	END SUBROUTINE readFloatData
-
-	SUBROUTINE readDoubleData(FILE_PATH, VAR, nbEle)
-		implicit none
-		REAL(KIND=8), DIMENSION(:), allocatable :: temp
-		REAL(KIND=8), DIMENSION(:), allocatable :: VAR
-		CHARACTER(LEN=*) :: FILE_PATH
-		INTEGER(kind=C_SIZE_T) :: COUNTER, fileSize
-		INTEGER(kind=C_SIZE_T), intent(out) :: nbEle
-
-		CALL checkFileSize(FILE_PATH, fileSize)
-		nbEle = fileSize/8
-		allocate(temp(nbEle))
-	
-		CALL readDoubleFile(FILE_PATH, len(trim(FILE_PATH)), temp, nbEle)
-		allocate(VAR(nbEle))
-		DO COUNTER=1,fileSize,1
-			VAR(COUNTER) = temp(COUNTER)
-		END DO		
-		deallocate(temp)		
-	END SUBROUTINE readDoubleData
-
-END MODULE RW
--- a/deps/SZ/sz/src/rwf.c
+++ b/deps/SZ/sz/src/rwf.c
@ -1,96 +0,0 @@
-/**
- *  @file rw.c
- *  @author Sheng Di
- *  @date April, 2015
- *  @brief io interface for fortrance
- *  (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "rw.h"
-
-void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize)
-{
-	int i; 
-	int status;
-	char s[*len+1];
-	for(i=0;i<*len;i++)
-		s[i]=srcFilePath[i];
-	s[*len]='\0';
-	*filesize = checkFileSize(s, &status);
-}
-
-void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength)
-{
-	size_t i; 
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=srcFilePath[i];
-    s[*len]='\0';
-    unsigned char *tmp_bytes = readByteData(s, byteLength, &ierr);
-    memcpy(bytes, tmp_bytes, *byteLength);
-    free(tmp_bytes);
-}
-
-void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle)
-{
-	size_t i; 
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=srcFilePath[i];
-    s[*len]='\0';	
-	double *tmp_data = readDoubleData(s, nbEle, &ierr);
-	memcpy(data, tmp_data, *nbEle);
-	free(tmp_data);
-}
-
-void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle)
-{
-	size_t i; 
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=srcFilePath[i];
-    s[*len]='\0';
-	float *tmp_data = readFloatData(s, nbEle, &ierr);
-	memcpy(data, tmp_data, *nbEle);
-	free(tmp_data);
-}
-
-void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len)
-{
-	size_t i; 
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=tgtFilePath[i];
-    s[*len]='\0';
-	writeByteData(bytes, *byteLength, s, &ierr);
-}
-
-void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len)
-{
-	size_t i;
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=tgtFilePath[i];
-    s[*len]='\0';	
-	writeDoubleData(data, *nbEle, s, &ierr);
-}
-
-void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len)
-{
-	size_t i; 
-	int ierr;
-    char s[*len+1];
-    for(i=0;i<*len;i++)
-        s[i]=tgtFilePath[i];
-    s[*len]='\0';
-	writeFloatData(data, *nbEle, s, &ierr);
-}
--- a/deps/SZ/sz/src/sz.c
+++ b/deps/SZ/sz/src/sz.c
@ -18,7 +18,6 @@
 #include "TightDataPointStorageD.h"
 #include "TightDataPointStorageF.h"
 #include "zlib.h"
-#include "rw.h"
 #include "conf.h"
 #include "utility.h"

@ -36,16 +35,6 @@ sz_params *confparams_dec = NULL; //used for decompression

 sz_exedata *exe_params = NULL;

-/*following global variables are desgined for time-series based compression*/
-/*sz_varset is not used in the single-snapshot data compression*/
-SZ_VarSet* sz_varset = NULL;
-sz_multisteps *multisteps = NULL;
-sz_tsc_metadata *sz_tsc = NULL;
-
-//only for Pastri compressor
-#ifdef PASTRI
-pastri_params pastri_par;
-#endif



@ -64,10 +53,6 @@ int SZ_Init(const char *configFilePath)
 		return SZ_FAILED;
 	
 	exe_params->SZ_SIZE_TYPE = SZ_SIZE_TYPE_DEFUALT;
-	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
-	{
-		initSZ_TSC();
-	}
 	return SZ_SUCCESS;
 }

--- a/deps/SZ/sz/src/sz_double.c
+++ b/deps/SZ/sz/src/sz_double.c
@ -21,10 +21,7 @@
 #include "sz_double.h"
 #include "szd_double.h"
 #include "zlib.h"
-#include "rw.h"
 #include "utility.h"
-#include "CacheTable.h"
-#include "MultiLevelCacheTableWideInterval.h"
 #include "sz_stats.h"

 unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
--- a/deps/SZ/sz/src/sz_float.c
+++ b/deps/SZ/sz/src/sz_float.c
@ -21,22 +21,11 @@
 #include "sz_float.h"
 #include "szd_float.h"
 #include "zlib.h"
-#include "rw.h"
 #include "utility.h"
-#include "CacheTable.h"
-#include "MultiLevelCacheTableWideInterval.h"
-#include "sz_stats.h"

 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))


-unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
-{
-	*outSize = dataLength*sizeof(float);
-	unsigned char* out = (unsigned char*)malloc(dataLength*sizeof(float));
-	memcpy(out, data, dataLength*sizeof(float));
-	return out;
-}

 void computeReqLength_float(double realPrecision, short rangeExpo, int* reqLength, float* medianValue)
 {
--- a/deps/SZ/sz/src/sz_interface.F90
+++ b/deps/SZ/sz/src/sz_interface.F90
--- a/deps/SZ/sz/src/sz_stats.c
+++ b/deps/SZ/sz/src/sz_stats.c
@ -1,60 +0,0 @@
-#include <sz_stats.h>
-
-sz_stats sz_stat;
-
-void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks)
-{
-	sz_stat.use_mean = use_mean;
-	sz_stat.blockSize = blockSize;
-	sz_stat.lorenzoBlocks = totalBlocks - regressionBlocks;
-	sz_stat.regressionBlocks = regressionBlocks;
-	sz_stat.totalBlocks = totalBlocks;
-	sz_stat.lorenzoPercent = 1.0f*sz_stat.lorenzoBlocks/(float)totalBlocks;
-	sz_stat.regressionPercent = 1.0f*regressionBlocks/(float)totalBlocks;
-}
-
-void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNodeCount)
-{
-	sz_stat.huffmanTreeSize = huffmanTreeSize;
-	sz_stat.huffmanCodingSize = huffmanCodingSize;
-	sz_stat.huffmanCompressionRatio = 1.0f*totalDataSize/(huffmanTreeSize+huffmanCodingSize);
-	sz_stat.huffmanNodeCount = huffmanNodeCount;
-}
-
-void writeZstdCompressionRatio(float zstdCompressionRatio)
-{
-	sz_stat.zstdCompressionRatio = zstdCompressionRatio;
-}	
-
-
-void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements)
-{
-	sz_stat.unpredictCount = unpredictCount;
-	sz_stat.unpredictPercent = 1.0f*unpredictCount/totalNumElements;
-}
-
-void printSZStats()
-{
-	printf("===============stats about sz================\n");
-	if(sz_stat.use_mean)
-		printf("use_mean:                  YES\n");
-	else
-		printf("use_mean:                  NO\n");
-		
-	printf("blockSize                  %zu\n", sz_stat.blockSize);
-	printf("lorenzoPercent             %f\n", sz_stat.lorenzoPercent);
-	printf("regressionPercent          %f\n", sz_stat.regressionPercent);
-	printf("lorenzoBlocks              %zu\n", sz_stat.lorenzoBlocks);
-	printf("regressionBlocks           %zu\n", sz_stat.regressionBlocks);
-	printf("totalBlocks                %zu\n", sz_stat.totalBlocks);
-	
-	printf("huffmanTreeSize            %zu\n", sz_stat.huffmanTreeSize);
-	printf("huffmanCodingSize          %zu\n", sz_stat.huffmanCodingSize);
-	printf("huffmanCompressionRatio    %f\n", sz_stat.huffmanCompressionRatio);
-	printf("huffmanNodeCount           %d\n", sz_stat.huffmanNodeCount);
-	
-	//printf("zstdCompressionRatio       %f\n", sz_stat.zstdCompressionRatio);
-
-	printf("unpredictCount             %zu\n", sz_stat.unpredictCount);
-	printf("unpredictPercent           %f\n", sz_stat.unpredictPercent);
-}