reduce files to smaller and delete no use files and function

This commit is contained in:
tickduan 2021-07-07 09:52:18 +08:00
parent 7c1ae052ef
commit f9bf4244ce
38 changed files with 33 additions and 9139 deletions

View File

@ -1,93 +0,0 @@
#AM_CFLAGS = -I./include -I../zlib
#LDFLAGS=-fPIC -shared
AUTOMAKE_OPTIONS=foreign
if FORTRAN
include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\
include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\
include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h include/szf.h\
include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\
include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\
include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\
include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\
include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\
include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\
include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h sz.mod rw.mod
lib_LTLIBRARIES=libSZ.la
libSZ_la_CFLAGS=-I./include -I../zlib/ -I../zstd/
if TIMECMPR
libSZ_la_CFLAGS+=-DHAVE_TIMECMPR
endif
if RANDOMACCESS
libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS
endif
if OPENMP
libSZ_la_CFLAGS+=-fopenmp
endif
libSZ_la_LDFLAGS = -version-info 2:1:0
libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zstd/.libs/libzstd.a
libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \
src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c src/szf.c \
src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\
src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \
src/conf.c src/DynamicDoubleArray.c src/rwf.c src/TypeManager.c \
src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \
src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\
src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\
src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\
src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/CacheTable.c\
src/sz_interface.F90 src/rw_interface.F90 src/exafelSZ.c
libSZ_la_LINK=$(AM_V_CC)$(LIBTOOL) --tag=FC --mode=link $(FCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES)
else
include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\
include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\
include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h\
include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\
include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\
include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\
include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\
include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\
include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\
include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h
lib_LTLIBRARIES=libSZ.la
libSZ_la_CFLAGS=-I./include -I../zlib -I../zstd/
if WRITESTATS
libSZ_la_CFLAGS+=-DHAVE_WRITESTATS
endif
if TIMECMPR
libSZ_la_CFLAGS+=-DHAVE_TIMECMPR
endif
if RANDOMACCESS
libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS
endif
if OPENMP
libSZ_la_CFLAGS+=-fopenmp
endif
libSZ_la_LDFLAGS = -version-info 1:4:0
libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zlib/.libs/libzstd.a
libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \
src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c\
src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\
src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \
src/conf.c src/DynamicDoubleArray.c src/TypeManager.c \
src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \
src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\
src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\
src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\
src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/exafelSZ.c src/CacheTable.c
if PASTRI
libSZ_la_SOURCES+=src/pastri.c
endif
if OPENMP
libSZ_la_SOURCES+=src/sz_omp.c
endif
if TIMECMPR
libSZ_la_SOURCES+=src/sz_float_ts.c src/szd_float_ts.c src/sz_double_ts.c src/szd_double_ts.c
endif
if WRITESTATS
libSZ_la_SOURCES+=src/sz_stats.c
endif
libSZ_la_LINK= $(AM_V_CC)$(LIBTOOL) --tag=CC --mode=link $(CCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES)
endif

1729
deps/SZ/sz/Makefile.in vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,62 +0,0 @@
/**
* @file ArithmeticCoding.h
* @author Sheng Di
* @date Dec, 2018
* @brief Header file for the ArithmeticCoding.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _ArithmeticCoding_H
#define _ArithmeticCoding_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#define ONE_FOURTH (0x40000000000) //44 bits are absolutely enough to deal with a large dataset (support at most 16TB per process)
#define ONE_HALF (0x80000000000)
#define THREE_FOURTHS (0xC0000000000)
#define MAX_CODE (0xFFFFFFFFFFF)
#define MAX_INTERVALS 1048576 //the limit to the arithmetic coding (at most 2^(20) intervals)
typedef struct Prob {
size_t low;
size_t high;
int state;
} Prob;
typedef struct AriCoder
{
int numOfRealStates; //the # real states menas the number of states after the optimization of # intervals
int numOfValidStates; //the # valid states means the number of non-zero frequency cells (some states/codes actually didn't appear)
size_t total_frequency;
Prob* cumulative_frequency; //used to encode data more efficiencly
} AriCoder;
void output_bit_1(unsigned int* buf);
void output_bit_0(unsigned int* buf);
unsigned int output_bit_1_plus_pending(int pending_bits);
unsigned int output_bit_0_plus_pending(int pending_bits);
AriCoder *createAriCoder(int numOfStates, int *s, size_t length);
void freeAriCoder(AriCoder *ariCoder);
void ari_init(AriCoder *ariCoder, int *s, size_t length);
unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out);
int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes);
unsigned char get_bit(unsigned char* p, int offset);
void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize);
void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out);
Prob* getCode(AriCoder *ariCoder, size_t scaled_value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _ArithmeticCoding_H ----- */

View File

@ -1,40 +0,0 @@
/**
* @file CacheTable.h
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Header file.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef SZ_MASTER_CACHETABLE_H
#define SZ_MASTER_CACHETABLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "stdio.h"
#include "stdint.h"
#include <math.h>
extern double* g_CacheTable;
extern uint32_t * g_InverseTable;
extern uint32_t baseIndex;
extern uint32_t topIndex;
extern int bits;
int doubleGetExpo(double d);
int CacheTableGetRequiredBits(double precision, int quantization_intervals);
uint32_t CacheTableGetIndex(float value, int bits);
uint64_t CacheTableGetIndexDouble(double value, int bits);
int CacheTableIsInBoundary(uint32_t index);
void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals);
uint32_t CacheTableFind(uint32_t index);
void CacheTableFree();
#ifdef __cplusplus
}
#endif
#endif //SZ_MASTER_CACHETABLE_H

View File

@ -1,36 +0,0 @@
/**
* @file DynamicDoubleArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Double Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicDoubleArray_H
#define _DynamicDoubleArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicDoubleArray
{
double* array;
size_t size;
double capacity;
} DynamicDoubleArray;
void new_DDA(DynamicDoubleArray **dda, size_t cap);
void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data);
void free_DDA(DynamicDoubleArray *dda);
double getDDA_Data(DynamicDoubleArray *dda, size_t pos);
void addDDA_Data(DynamicDoubleArray *dda, double value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicDoubleArray_H ----- */

View File

@ -1,35 +0,0 @@
/**
* @file DynamicFloatArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Float Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicFloatArray_H
#define _DynamicFloatArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicFloatArray
{
float* array;
size_t size;
size_t capacity;
} DynamicFloatArray;
void new_DFA(DynamicFloatArray **dfa, size_t cap);
void convertDFAtoFloats(DynamicFloatArray *dfa, float **data);
void free_DFA(DynamicFloatArray *dfa);
float getDFA_Data(DynamicFloatArray *dfa, size_t pos);
void addDFA_Data(DynamicFloatArray *dfa, float value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicFloatArray_H ----- */

View File

@ -1,50 +0,0 @@
/**
* @file MultiLevelCacheTable.h
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Header file.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _MULTILEVELCACHETABLE_H
#define _MULTILEVELCACHETABLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <memory.h>
#include <stdlib.h>
#include "stdio.h"
typedef struct SubLevelTable{
uint32_t baseIndex;
uint32_t topIndex;
uint32_t* table;
uint8_t expoIndex;
} SubLevelTable;
typedef struct TopLevelTable{
uint8_t bits;
uint8_t baseIndex;
uint8_t topIndex;
struct SubLevelTable* subTables;
float bottomBoundary;
float topBoundary;
} TopLevelTable;
uint8_t MLCT_GetExpoIndex(float value);
uint8_t MLCT_GetRequiredBits(float precision);
uint32_t MLCT_GetMantiIndex(float value, int bits);
float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits);
void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision);
uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable);
void MultiLevelCacheTableFree(struct TopLevelTable* table);
#ifdef __cplusplus
}
#endif
#endif //_MULTILEVELCACHETABLE_H

View File

@ -1,54 +0,0 @@
/**
* @file MultiLevelCacheTableWideInterval.h
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Header file for MultiLevelCacheTableWideInterval.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _MULTILEVELCACHETABLEWIDEINTERVAL_H
#define _MULTILEVELCACHETABLEWIDEINTERVAL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <memory.h>
#include <stdlib.h>
#include "stdio.h"
typedef struct SubLevelTableWideInterval{
uint64_t baseIndex;
uint64_t topIndex;
uint16_t* table;
uint16_t expoIndex;
} SubLevelTableWideInterval;
typedef struct TopLevelTableWideInterval{
uint16_t bits;
uint16_t baseIndex;
uint16_t topIndex;
struct SubLevelTableWideInterval* subTables;
double bottomBoundary;
double topBoundary;
} TopLevelTableWideInterval;
void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable);
uint16_t MLCTWI_GetExpoIndex(double value);
uint16_t MLCTWI_GetRequiredBits(double precision);
uint64_t MLCTWI_GetMantiIndex(double value, int bits);
double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits);
void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits);
uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable);
void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table);
#ifdef __cplusplus
}
#endif
#endif //_MULTILEVELCACHETABLEWIDEINTERVAL_H

View File

@ -18,19 +18,12 @@ extern "C" {
#include <stdint.h>
//TypeManager.c
size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result);
size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result);
void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result);
void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
int getLeftMovingSteps(size_t k, unsigned char resiBitLength);
size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes);
size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes);
int computeBitNumRequired(size_t dataLength);
void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength);
#ifdef __cplusplus
}

View File

@ -1,84 +0,0 @@
/**
* @file VarSet.h
* @author Sheng Di
* @date July, 2016
* @brief Header file for the Variable.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _VarSet_H
#define _VarSet_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct sz_multisteps
{
char compressionType;
int predictionMode;
int lastSnapshotStep; //the previous snapshot step
unsigned int currentStep; //current time step of the execution/simulation
//void* ori_data; //original data pointer, which serve as the key for retrieving hist_data
void* hist_data; //historical data in past time steps
} sz_multisteps;
typedef struct SZ_Variable
{
unsigned char var_id;
char* varName;
char compressType; //102 means HZ; 101 means SZ
int dataType; //SZ_FLOAT or SZ_DOUBLE
size_t r5;
size_t r4;
size_t r3;
size_t r2;
size_t r1;
int errBoundMode;
double absErrBound;
double relBoundRatio;
double pwRelBoundRatio;
void* data;
sz_multisteps *multisteps;
unsigned char* compressedBytes;
size_t compressedSize;
struct SZ_Variable* next;
} SZ_Variable;
typedef struct SZ_VarSet
{
unsigned short count;
struct SZ_Variable *header;
struct SZ_Variable *lastVar;
} SZ_VarSet;
void free_Variable_keepOriginalData(SZ_Variable* v);
void free_Variable_keepCompressedBytes(SZ_Variable* v);
void free_Variable_all(SZ_Variable* v);
void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data,
int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,
size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName);
int SZ_batchDelVar(char* varName);
int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id);
int SZ_batchDelVar_ID(int var_id);
SZ_Variable* SZ_searchVar(char* varName);
void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1);
void free_VarSet_vset(SZ_VarSet *vset, int mode);
void SZ_freeVarSet(int mode);
void free_multisteps(sz_multisteps* multisteps);
int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count);
SZ_Variable* SZ_getVariable(int var_id);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _VarSet_H ----- */

View File

@ -20,11 +20,8 @@ extern "C" {
void updateQuantizationInfo(int quant_intervals);
int SZ_ReadConf(const char* sz_cfgFile);
int SZ_LoadConf(const char* sz_cfgFile);
int checkVersion(unsigned char version);
int computeVersion(int major, int minor, int revision);
int checkVersion2(char* version);
void initSZ_TSC();
unsigned int roundUpToPowerOf2(unsigned int base);
double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range);
double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle);

View File

@ -1,140 +0,0 @@
//CHECK:
//What happens when ECQBits==1, or ECQBits==0 or ECQBits<0?
//Rounding? Scale originalEb by 0.99?
//Possible improvement: Change GAMESS format: {i i i i d} -> {i}{i}{i}{i}{d}
//Possible improvement: Optimize bookkeeping bits
//Possible improvement: Guess the type (C/UC, Sparse/Not)
//Possible improvement: Get rid of writing/reading some of the indexes to in/out buffers
//Possible improvement: Get rid of all debug stuff, including Makefile debug flags
//Possible improvement: Get rid of "compressedBytes"
//Possible improvement: SparseCompressed, ECQBits=2: 1's and -1's can be represented by just 0 and 1, instead 10 and 11.
//Possible improvement: SparseCompressed, ECQBits>2: Again: 1: 10, -1:11, Others: 0XX...XX
//Possible improvement: WriteBitsFast: maybe remove some masks?
//Possible improvement: WriteBitsFast: Get rid of multiple calls!
//Possible improvement: UCSparse: Indexes use 64 bits. It can be lowered to _1DIdxBits
//Possible improvement: Parameters: Smaller data sizes may be possible!
#ifndef PASTRI_H
#define PASTRI_H
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <assert.h> //Just for debugging purposes!
//#define DATASIZE 8 //Bytes per input data point.
//We have only 1 double per data point, so it is 8 bytes.
#define MAX_PS_SIZE 100
#define MAX_BLOCK_SIZE 10000
#define MAX_BUFSIZE 160000 //Should be a multiple of 8
#define D_W 0 //Debug switch: Write (input block)
#define D_R 0 //Debug switch: Read (compressed block)
#define D_G 0 //Debug switch: General
#define D_G2 0 //Debug switch: General 2 (a little more detail)
#define D_C 0 //Debug switch: C
//#define DEBUG 1 //Debug switch
//#define BOOKKEEPINGBITS 0 //Currently unused
//#define BOOKKEEPINGBITS 120 //Includes: mode, indexOffsets, compressedBytes, Pb_, ECQBits_ (8+64+32+8+8)
//BOOKKEEPINGBITS is defined here, because if P & S is going to be used, they appear just after the bookkeeping part.
//This allows us to write P and S directly onto using outBuf.
// IMPORTANT NOTE:
//Read/Write up to 56 bits.
//More than that is not supported!
/********************************************************************/
//Datatype Declarations:
/********************************************************************/
typedef struct pastri_params{
double originalEb; //Error Bound entered by the user
double usedEb; //Error Bound used during compression/deceompression
int numBlocks; //Number of blocks to be compressed
int dataSize; //8(=Double) or 4(=Float)
int bf[4]; //Orbital types (basis function types). Typically in range [0,3]
int idxRange[4]; //Ranges of indexes. idxRange[i]=(bf[i]+1)*(bf[i]+2)/2;
int sbSize; //=idxRange[2]*idxRange[3];
int sbNum; //=idxRange[0]*idxRange[1];
int bSize; //=sbSize*sbNum;
//uint16_t idxOffset[4]; //Index offset values. No longer used.
}pastri_params;
//Block-specific stuff:
typedef struct pastri_blockParams{
uint16_t nonZeros;
//int ECQ0s; //= p->bSize - numOutliers //OR: p->bSize=ECQ0s+ECQ1s+ECQOthers
int ECQ1s;
int ECQOthers;
int numOutliers; //=ECQ1s+ECQOthers
int patternBits;
int scaleBits;
double binSize;
double scalesBinSize;
uint64_t ECQExt;
int ECQBits;
int _1DIdxBits;
}pastri_blockParams;
typedef union u_UI64I64D{
uint64_t ui64;
int64_t i64;
double d;
} u_UI64I64D;
/********************************************************************/
//Function Prototypes:
/********************************************************************/
void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr);
//Read the basic PaSTRI parameters from a file, speficied by paramsFilename.
void SZ_pastriPreprocessParameters(pastri_params *p);
//Using basic PaSTRI parameters, generate the others.
//For example, block and sub-block sizes are generated by using basis function types.
void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes);
//INPUTS: p, originalBuf
//OUTPUTS: compressedBufP, compressedBytes
//Using the inputs, compressedBufP is allocated and populated by the compressed data. Compressed size is written into compressedBytes.
//Parameters are also stored at the beginning part of the compressedBuf
void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes);
//INPUTS: compressedBuf
//OUTPUTS: p, decompressedBufP, decompressedBytes
//First, parameters are read from compressedBuf and written into p.
//Then, decompressedBufP is allocated and populated by the decompressed data. Decompressed size is written into decompressedBytes.
void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf);
//INPUTS: p, originalBuf, decompressedBuf
//OUTPUTS: None (Just some on-screen messages)
//Compares originalBuf with decompressedBuf. Checks whether the absolute error condition is satisfied or not.
/********************************************************************/
//Other Includes:
/********************************************************************/
#include "pastriGeneral.h" //General tools
#include "pastriD.h" //Compression/Decompression for Double data
#include "pastriF.h" //Compression/Decompression for Float data
#endif

View File

@ -1,911 +0,0 @@
#ifndef PASTRID_H
#define PASTRID_H
static inline int64_t pastri_double_quantize(double x, double binSize){
//Add or sub 0.5, depending on the sign:
x=x/binSize;
u_UI64I64D u1,half;
u1.d=x;
half.d=0.5;
// //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x))));
// //printf("sign(x):0x%lx\n", x);
// //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
// //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
return (int64_t)(x + half.d);
}
static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){
//Find the pattern.
//First, find the extremum point:
double absExt=0; //Absolute value of Extremum
int extIdx=-1; //Index of Extremum
bp->nonZeros=0;
int i,sb;
for(i=0;i<p->bSize;i++){
// //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
if(abs_FastD(data[i])>p->usedEb){
bp->nonZeros++;
////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
}
if(abs_FastD(data[i])>absExt){
absExt=abs_FastD(data[i]);
extIdx=i;
}
}
int patternIdx; //Starting Index of Pattern
patternIdx=(extIdx/p->sbSize)*p->sbSize;
double patternExt=data[extIdx];
bp->binSize=2*p->usedEb;
////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG
//int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement!
for(i=0;i<p->sbSize;i++){
patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize);
//if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
}
bp->patternBits=bitsNeeded_double((abs_FastD(patternExt)/bp->binSize)+1)+1;
bp->scaleBits=bp->patternBits;
bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1);
////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
//if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
//Calculate Scales.
//The index part of the input buffer will be reused to hold Scale, Pattern, etc. values.
int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum.
//int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement!
int patternExtZero=(patternExt==0);
////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
for(sb=0;sb<p->sbNum;sb++){
//scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
//scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt;
//assert(scales[sb]<=1);
scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
//if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
}
////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
//int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
//uint64_t wVal;
bp->ECQExt=0;
int _1DIdx;
bp->ECQ1s=0;
bp->ECQOthers=0;
double PS_binSize=bp->scalesBinSize*bp->binSize;
for(sb=0;sb<p->sbNum;sb++){
for(i=0;i<p->sbSize;i++){
_1DIdx=sb*p->sbSize+i;
ECQ[_1DIdx]=pastri_double_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize );
double absECQ=abs_FastD(ECQ[_1DIdx]);
if(absECQ > bp->ECQExt)
bp->ECQExt=absECQ;
////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
switch (ECQ[_1DIdx]){
case 0:
//ECQ0s++; //Currently not needed
break;
case 1:
bp->ECQ1s++;
break;
case -1:
bp->ECQ1s++;
break;
default:
bp->ECQOthers++;
break;
}
}
}
/*
//DEBUG: Self-check. Remove this later.
for(sb=0;sb<p->sbNum;sb++){
for(i=0;i<p->sbSize;i++){
_1DIdx=sb*p->sbSize+i;
double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
//printf("p->usedEb=%.6e\n",p->usedEb);
//printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
assert(0);
}
}
}
*/
}
static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){
bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1;
bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
//(*numOutBytes)=0;
int i;
//Encode: 3 options:
//Compressed, Sparse ECQ
//Compressed, Non-Sparse ECQ
//Uncompressed, Sparse Data
//Uncompressed, Non-spsarse Data
unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data}
unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data
unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
//int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier!
//Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2}
//ECQ0s is actually not needed, but others are needed.
UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data.
UCNonSparseBits = p->dataSize*(1 + p->bSize*8);
bp->numOutliers=bp->ECQ1s+bp->ECQOthers;
if(bp->ECQBits==2){
CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits);
CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2;
}else{ //ECQBits>2
CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits);
//CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits);
CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits);
}
int UCSparseBytes=(UCSparseBits+7)/8;
int UCNonSparseBytes=(UCNonSparseBits+7)/8;
int CSparseBytes=(CSparseBits+7)/8;
int CNonSparseBytes=(CNonSparseBits+7)/8;
uint64_t bitPos=0;
uint64_t bytePos=0;
int i0,i1,i2,i3;
int _1DIdx;
//*(uint16_t*)(&outBuf[1])=p->idxOffset[0];
//*(uint16_t*)(&outBuf[3])=p->idxOffset[1];
//*(uint16_t*)(&outBuf[5])=p->idxOffset[2];
//*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
//if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
//if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
//****************************************************************************************
//if(0){ //DEBUG
//W:UCSparse
if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){
//Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
*numOutBytes=UCSparseBytes;
//if(D_G){printf("UCSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
outBuf[0]=0; //mode
//*(uint16_t*)(&outBuf[9])=nonZeros;
//bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11.
*(uint16_t*)(&outBuf[1])=bp->nonZeros;
bytePos=3;//0:mode, 2-3:NonZeros. So start from 3.
for(i0=0;i0<p->idxRange[0];i0++)
for(i1=0;i1<p->idxRange[1];i1++)
for(i2=0;i2<p->idxRange[2];i2++)
for(i3=0;i3<p->idxRange[3];i3++){
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
if(abs_FastD(data[_1DIdx])>p->usedEb){
//*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0];
*(uint16_t*)(&outBuf[bytePos])=i0;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1];
*(uint16_t*)(&outBuf[bytePos])=i1;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2];
*(uint16_t*)(&outBuf[bytePos])=i2;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3];
*(uint16_t*)(&outBuf[bytePos])=i3;
bytePos+=2;
*(double*)(&outBuf[bytePos])=data[_1DIdx];
bytePos+=p->dataSize;
}
}
//if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
//****************************************************************************************
//}else if(0){ //DEBUG
//W:UCNonSparse
}else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){
//Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
*numOutBytes=UCNonSparseBytes;
//if(D_G){printf("UCNonSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
outBuf[0]=1; //mode
//memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9);
memcpy(&outBuf[1], data, p->bSize*p->dataSize);
//if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
/*
for(i=0;i<UCNonSparseBytes-17;i++){
//printf("%d ",inBuf[p->bSize*8+i]);
}
//printf("\n");
for(i=0;i<UCNonSparseBytes-17;i++){
//printf("%d ",outBuf[17+i]);
}
//printf("\n");
*/
//****************************************************************************************
//}else if(1){ //DEBUG
//W:CSparse
}else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){
//Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
*numOutBytes=CSparseBytes;
//if(D_G){printf("CSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
outBuf[0]=2; //mode
////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
//outBuf[13]=patternBits;
//outBuf[14]=ECQBits;
////Currently, we are at the end of 15th byte.
//*(uint16_t*)(&outBuf[15])=numOutliers;
//bitPos=17*8; //Currently, we are at the end of 17th byte.
//outBuf bytes [1:4] are reserved for compressedBytes.
outBuf[5]=bp->patternBits;
outBuf[6]=bp->ECQBits;
//Currently, we are at the end of 7th byte.
*(uint16_t*)(&outBuf[7])=bp->numOutliers;
//Now, we are at the end of 9th byte.
bitPos=9*8;
////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbSize;i++){
writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
}
////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbNum;i++){
writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
}
////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
switch(bp->ECQBits){
case 2:
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2,0x10);
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
writeBits_Fast(outBuf,&bitPos,1,0);//0x00
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2,0x11);
//writeBits_Fast(outBuf,&bitPos,2,1);//0x01
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
assert(0);
break;
}
}
break;
default: //ECQBits>2
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,3,0);//0x000
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,3,1);//0x001
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
//writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits));
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
break;
}
}
break;
}
////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
uint32_t bytePos=(bitPos+7)/8;
//*(uint32_t*)(&outBuf[9])=bytePos;
*(uint32_t*)(&outBuf[1])=bytePos;
//if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
if(D_G){assert(bitPos==CSparseBits);}
//****************************************************************************************
//W:CNonSparse
}else {
//Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
*numOutBytes=CNonSparseBytes;
//if(D_G){printf("CNonSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
outBuf[0]=3; //mode
////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
//outBuf[13]=patternBits;
//outBuf[14]=ECQBits;
//bitPos=15*8; //Currently, we are at the end of 15th byte.
//outBuf bytes [1:4] are reserved for compressedBytes.
outBuf[5]=bp->patternBits;
outBuf[6]=bp->ECQBits;
bitPos=7*8; //Currently, we are at the end of 7th byte.
////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbSize;i++){
writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
}
////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbNum;i++){
writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
}
////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
switch(bp->ECQBits){
case 2:
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,1,1);//0x1
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
//writeBits_Fast(outBuf,&bitPos,2,2); //0x01
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
assert(0);
break;
}
}
break;
default: //ECQBits>2
////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
for(i=0;i<p->bSize;i++){
////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
switch(ECQ[i]){
case 0:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
writeBits_Fast(outBuf,&bitPos,1,1); //0x1
//wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,3,0); //0x000
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
//wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,3,8); //0x001
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
//wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
default:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,2,2); //0x01
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
//wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
}
}
break;
}
////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
uint32_t bytePos=(bitPos+7)/8;
//*(uint32_t*)(&outBuf[9])=bytePos;
*(uint32_t*)(&outBuf[1])=bytePos;
//if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
if(D_G){assert(bitPos==CNonSparseBits);}
}
////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
}
static inline int pastri_double_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){
pastri_blockParams bp;
//if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG
//if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG
//if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
//if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
int64_t patternQ[MAX_PS_SIZE];
int64_t scalesQ[MAX_PS_SIZE];
int64_t ECQ[MAX_BLOCK_SIZE];
double *data;
data=(double*)inBuf;
//STEP 0: PREPROCESSING:
//This step can include flattening the block, determining the period, etc.
//Currently not needed.
//STEP 1: PATTERN MATCH
pastri_double_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ);
//STEP 2: ENCODING(Include QUANTIZE)
pastri_double_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes);
return 0;
}
static inline double pastri_double_InverseQuantization(int64_t q, double binSize){
return q*binSize;
}
static inline void pastri_double_PredictData(pastri_params *p,pastri_blockParams *bp,double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
int j;
double PS_binSize=bp->scalesBinSize*bp->binSize;
for(j=0;j<p->bSize;j++){
//data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize;
data[j]=pastri_double_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_double_InverseQuantization(ECQ[j],bp->binSize);
}
}
static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
int j;
bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
//double *data=(double*)(outBuf+p->bSize*8);
double *data=(double*)(outBuf);
int i0,i1,i2,i3;
//uint16_t *idx0,*idx1,*idx2,*idx3;
int _1DIdx;
int64_t ECQTemp;
uint64_t bytePos=0;
uint64_t bitPos=0;
uint64_t temp,temp2;
//int sb,localIdx;
//idx0=(uint16_t*)(outBuf );
//idx1=(uint16_t*)(outBuf+p->bSize*2);
//idx2=(uint16_t*)(outBuf+p->bSize*4);
//idx3=(uint16_t*)(outBuf+p->bSize*6);
//p->idxOffset[0]=*(uint32_t*)(&inBuf[1]);
//p->idxOffset[1]=*(uint32_t*)(&inBuf[3]);
//p->idxOffset[2]=*(uint32_t*)(&inBuf[5]);
//p->idxOffset[3]=*(uint32_t*)(&inBuf[7]);
/*
for(i0=0;i0<p->idxRange[0];i0++)
for(i1=0;i1<p->idxRange[1];i1++)
for(i2=0;i2<p->idxRange[2];i2++)
for(i3=0;i3<p->idxRange[3];i3++){
//_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3;
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
idx0[_1DIdx]=i0+1+p->idxOffset[0];
idx1[_1DIdx]=i1+1+p->idxOffset[1];
idx2[_1DIdx]=i2+1+p->idxOffset[2];
idx3[_1DIdx]=i3+1+p->idxOffset[3];
}
*/
//*numOutBytes=p->bSize*16;
//inBuf[0] is "mode"
switch(inBuf[0]){
//R:UCSparse
case 0:
//if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
//bp->nonZeros=*(uint16_t*)(&inBuf[9]);
//bytePos=11;
bp->nonZeros=*(uint16_t*)(&inBuf[1]);
bytePos=3;
for(j=0;j<p->bSize;j++){
data[j]=0;
}
for(j=0;j<bp->nonZeros;j++){
//i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0
i0=*(uint16_t*)(&inBuf[bytePos]); //i0
bytePos+=2;
//i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1
i1=*(uint16_t*)(&inBuf[bytePos]); //i1
bytePos+=2;
//i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2
i2=*(uint16_t*)(&inBuf[bytePos]); //i2
bytePos+=2;
//i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3
i3=*(uint16_t*)(&inBuf[bytePos]); //i3
bytePos+=2;
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
data[_1DIdx]=*(double*)(&inBuf[bytePos]);
bytePos+=8;
}
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
break;
//R:UCNonSparse
case 1:
//if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
//memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
memcpy(data, &inBuf[1], p->bSize*8);
bytePos=p->bSize*8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
break;
//R:CSparse
case 2:
//if(D_G){printf("\nDC:CSparse\n");} //DEBUG
//for(j=0;j<p->bSize;j++){
// data[j]=0;
//}
//bp->patternBits=inBuf[13];
//bp->ECQBits=inBuf[14];
bp->patternBits=inBuf[5];
bp->ECQBits=inBuf[6];
//if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
//bp->numOutliers=*(uint16_t*)(&inBuf[15]);
//bitPos=17*8;
bp->numOutliers=*(uint16_t*)(&inBuf[7]);
bitPos=9*8;
//if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1);
bp->binSize=p->usedEb*2;
//if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
for(j=0;j<p->sbSize;j++){
patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
//if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
}
for(j=0;j<p->sbNum;j++){
scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
//if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
}
/* //Splitting
for(j=0;j<p->bSize;j++){
data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
}
*/
for(j=0;j<p->bSize;j++){
ECQ[j]=0;
}
switch(bp->ECQBits){
case 2:
for(j=0;j<bp->numOutliers;j++){
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
ECQ[_1DIdx]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
default: //bp->ECQBits>2
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
for(j=0;j<bp->numOutliers;j++){
_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
temp=readBits_UI64(inBuf,&bitPos,1);
////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
switch(temp){
case 0: //+-1
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
break;
case 1: //Others
ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
break;
//default:
//// printf("ERROR: Bad 2-bit value: 0x%lx",temp);
// assert(0); //AMG
// break;
}
//data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
ECQ[_1DIdx]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
}
//static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
//patternQ=(int64_t*)(inBuf+15);
//scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
bytePos=(bitPos+7)/8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
//STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
break;
//R:CNonSparse
case 3:
//if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
//for(j=0;j<p->bSize;j++){
// data[j]=0;
//}
//bp->patternBits=inBuf[13];
//bp->ECQBits=inBuf[14];
bp->patternBits=inBuf[5];
bp->ECQBits=inBuf[6];
//if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
//bitPos=15*8;
bitPos=7*8;
bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1);
bp->binSize=p->usedEb*2;
//if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
for(j=0;j<p->sbSize;j++){
patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
//if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
}
for(j=0;j<p->sbNum;j++){
scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
//if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
}
/* //Splitting
for(j=0;j<p->bSize;j++){
data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
}
*/
switch(bp->ECQBits){
case 2:
for(j=0;j<p->bSize;j++){
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
//_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
temp=readBits_UI64(inBuf,&bitPos,1);
switch(temp){
case 0:
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
break;
case 1:
ECQTemp=0;
break;
default:
assert(0);
break;
}
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
//data[j]-=ECQTemp*bp->binSize; //Splitting
ECQ[j]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
default: //bp->ECQBits>2
////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
for(j=0;j<p->bSize;j++){
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
//_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
temp=readBits_UI64(inBuf,&bitPos,1);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
switch(temp){
case 0:
////if(DEBUG)printf("Read:0");
temp2=readBits_UI64(inBuf,&bitPos,1);
switch(temp2){
case 0:
////if(DEBUG)printf("0");
ECQTemp=readBits_I64(inBuf,&bitPos,1);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
case 1:
////if(DEBUG)printf("1\n");
ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
default:
assert(0);
break;
}
break;
case 1:
////if(DEBUG)printf("Read:1\n");
ECQTemp=0;
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
default:
assert(0);
break;
}
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
//data[j]-=ECQTemp*bp->binSize; //Splitting
ECQ[j]=ECQTemp;
////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
}
break;
}
//static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
//patternQ=(int64_t*)(inBuf+15);
//scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
bytePos=(bitPos+7)/8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
//STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
break;
default:
assert(0);
break;
}
(*numReadBytes)=bytePos;
}
static inline void pastri_double_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){
int64_t patternQ[MAX_PS_SIZE];
int64_t scalesQ[MAX_PS_SIZE];
int64_t ECQ[MAX_BLOCK_SIZE];
pastri_blockParams bp;
//STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION))
//(Further steps are called inside pastri_double_Decode function)
pastri_double_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ);
return;
}
//inBuf vs Decompressed
static inline int pastri_double_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){
int i;
double *data=(double*)(inBuf);
double *data_dc=(double*)(DC);
//Comparing Indexes:
/*
for(i=0;i<p->bSize;i++){
if(idx0[i]!=idx0_dc[i]){
//printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
assert(0);
}
if(idx1[i]!=idx1_dc[i]){
//printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
assert(0);
}
if(idx2[i]!=idx2_dc[i]){
//printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
assert(0);
}
if(idx3[i]!=idx3_dc[i]){
//printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
assert(0);
}
}
*/
//Comparing Data:
for(i=0;i<p->bSize;i++){
if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
//printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
assert(0);
}
}
return 0;
}
#endif

View File

@ -1,911 +0,0 @@
#ifndef PASTRIF_H
#define PASTRIF_H
static inline int64_t pastri_float_quantize(float x, float binSize){
//Add or sub 0.5, depending on the sign:
x=x/binSize;
u_UI64I64D u1,half;
u1.d=x;
half.d=0.5;
////printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x))));
////printf("sign(x):0x%lx\n", x);
////printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
return (int64_t)(x + half.d);
}
static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){
//Find the pattern.
//First, find the extremum point:
float absExt=0; //Absolute value of Extremum
int extIdx=-1; //Index of Extremum
bp->nonZeros=0;
int i,sb;
for(i=0;i<p->bSize;i++){
////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
if(abs_FastD(data[i])>p->usedEb){
bp->nonZeros++;
////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
}
if(abs_FastD(data[i])>absExt){
absExt=abs_FastD(data[i]);
extIdx=i;
}
}
int patternIdx; //Starting Index of Pattern
patternIdx=(extIdx/p->sbSize)*p->sbSize;
float patternExt=data[extIdx];
bp->binSize=2*p->usedEb;
////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG
//int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement!
for(i=0;i<p->sbSize;i++){
patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize);
//if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
}
bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1;
bp->scaleBits=bp->patternBits;
bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1);
////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
//if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
//Calculate Scales.
//The index part of the input buffer will be reused to hold Scale, Pattern, etc. values.
int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum.
//int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement!
int patternExtZero=(patternExt==0);
////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
for(sb=0;sb<p->sbNum;sb++){
//scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
//scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt;
//assert(scales[sb]<=1);
scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
//if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
}
////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
//int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
//uint64_t wVal;
bp->ECQExt=0;
int _1DIdx;
bp->ECQ1s=0;
bp->ECQOthers=0;
float PS_binSize=bp->scalesBinSize*bp->binSize;
for(sb=0;sb<p->sbNum;sb++){
for(i=0;i<p->sbSize;i++){
_1DIdx=sb*p->sbSize+i;
ECQ[_1DIdx]=pastri_float_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize );
float absECQ=abs_FastD(ECQ[_1DIdx]);
if(absECQ > bp->ECQExt)
bp->ECQExt=absECQ;
////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
switch (ECQ[_1DIdx]){
case 0:
//ECQ0s++; //Currently not needed
break;
case 1:
bp->ECQ1s++;
break;
case -1:
bp->ECQ1s++;
break;
default:
bp->ECQOthers++;
break;
}
}
}
/*
//DEBUG: Self-check. Remove this later.
for(sb=0;sb<p->sbNum;sb++){
for(i=0;i<p->sbSize;i++){
_1DIdx=sb*p->sbSize+i;
float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
//printf("p->usedEb=%.6e\n",p->usedEb);
//printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
assert(0);
}
}
}
*/
}
static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){
bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1;
bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
//(*numOutBytes)=0;
int i;
//Encode: 3 options:
//Compressed, Sparse ECQ
//Compressed, Non-Sparse ECQ
//Uncompressed, Sparse Data
//Uncompressed, Non-spsarse Data
unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data}
unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data
unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
//int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier!
//Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2}
//ECQ0s is actually not needed, but others are needed.
UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data.
UCNonSparseBits = p->dataSize*(1 + p->bSize*8);
bp->numOutliers=bp->ECQ1s+bp->ECQOthers;
if(bp->ECQBits==2){
CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits);
CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2;
}else{ //ECQBits>2
CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits);
//CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits);
CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits);
}
int UCSparseBytes=(UCSparseBits+7)/8;
int UCNonSparseBytes=(UCNonSparseBits+7)/8;
int CSparseBytes=(CSparseBits+7)/8;
int CNonSparseBytes=(CNonSparseBits+7)/8;
uint64_t bitPos=0;
uint64_t bytePos=0;
int i0,i1,i2,i3;
int _1DIdx;
//*(uint16_t*)(&outBuf[1])=p->idxOffset[0];
//*(uint16_t*)(&outBuf[3])=p->idxOffset[1];
//*(uint16_t*)(&outBuf[5])=p->idxOffset[2];
//*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
//if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
//if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
//****************************************************************************************
//if(0){ //DEBUG
//W:UCSparse
if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){
//Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
*numOutBytes=UCSparseBytes;
//if(D_G){printf("UCSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
outBuf[0]=0; //mode
//*(uint16_t*)(&outBuf[9])=nonZeros;
//bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11.
*(uint16_t*)(&outBuf[1])=bp->nonZeros;
bytePos=3;//0:mode, 2-3:NonZeros. So start from 3.
for(i0=0;i0<p->idxRange[0];i0++)
for(i1=0;i1<p->idxRange[1];i1++)
for(i2=0;i2<p->idxRange[2];i2++)
for(i3=0;i3<p->idxRange[3];i3++){
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
if(abs_FastD(data[_1DIdx])>p->usedEb){
//*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0];
*(uint16_t*)(&outBuf[bytePos])=i0;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1];
*(uint16_t*)(&outBuf[bytePos])=i1;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2];
*(uint16_t*)(&outBuf[bytePos])=i2;
bytePos+=2;
//*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3];
*(uint16_t*)(&outBuf[bytePos])=i3;
bytePos+=2;
*(float*)(&outBuf[bytePos])=data[_1DIdx];
bytePos+=p->dataSize;
}
}
//if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
//****************************************************************************************
//}else if(0){ //DEBUG
//W:UCNonSparse
}else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){
//Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
*numOutBytes=UCNonSparseBytes;
//if(D_G){printf("UCNonSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
outBuf[0]=1; //mode
//memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9);
memcpy(&outBuf[1], data, p->bSize*p->dataSize);
//if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
/*
for(i=0;i<UCNonSparseBytes-17;i++){
//printf("%d ",inBuf[p->bSize*8+i]);
}
//printf("\n");
for(i=0;i<UCNonSparseBytes-17;i++){
//printf("%d ",outBuf[17+i]);
}
//printf("\n");
*/
//****************************************************************************************
//}else if(1){ //DEBUG
//W:CSparse
}else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){
//Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
*numOutBytes=CSparseBytes;
//if(D_G){printf("CSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
outBuf[0]=2; //mode
////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
//outBuf[13]=patternBits;
//outBuf[14]=ECQBits;
////Currently, we are at the end of 15th byte.
//*(uint16_t*)(&outBuf[15])=numOutliers;
//bitPos=17*8; //Currently, we are at the end of 17th byte.
//outBuf bytes [1:4] are reserved for compressedBytes.
outBuf[5]=bp->patternBits;
outBuf[6]=bp->ECQBits;
//Currently, we are at the end of 7th byte.
*(uint16_t*)(&outBuf[7])=bp->numOutliers;
//Now, we are at the end of 9th byte.
bitPos=9*8;
////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbSize;i++){
writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
}
////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbNum;i++){
writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
}
////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
switch(bp->ECQBits){
case 2:
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2,0x10);
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
writeBits_Fast(outBuf,&bitPos,1,0);//0x00
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2,0x11);
//writeBits_Fast(outBuf,&bitPos,2,1);//0x01
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
assert(0);
break;
}
}
break;
default: //ECQBits>2
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,3,0);//0x000
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,3,1);//0x001
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
//writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
//writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits));
//writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
break;
}
}
break;
}
////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
uint32_t bytePos=(bitPos+7)/8;
//*(uint32_t*)(&outBuf[9])=bytePos;
*(uint32_t*)(&outBuf[1])=bytePos;
//if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
if(D_G){assert(bitPos==CSparseBits);}
//****************************************************************************************
//W:CNonSparse
}else {
//Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
*numOutBytes=CNonSparseBytes;
//if(D_G){printf("CNonSparse\n");} //DEBUG
//if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
outBuf[0]=3; //mode
////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes.
//outBuf[13]=patternBits;
//outBuf[14]=ECQBits;
//bitPos=15*8; //Currently, we are at the end of 15th byte.
//outBuf bytes [1:4] are reserved for compressedBytes.
outBuf[5]=bp->patternBits;
outBuf[6]=bp->ECQBits;
bitPos=7*8; //Currently, we are at the end of 7th byte.
////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbSize;i++){
writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
}
////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
for(i=0;i<p->sbNum;i++){
writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
}
////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
switch(bp->ECQBits){
case 2:
for(i=0;i<p->bSize;i++){
switch(ECQ[i]){
case 0:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
writeBits_Fast(outBuf,&bitPos,1,1);//0x1
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
//writeBits_Fast(outBuf,&bitPos,2,0);//0x00
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
//writeBits_Fast(outBuf,&bitPos,2,2); //0x01
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
break;
default:
assert(0);
break;
}
}
break;
default: //ECQBits>2
////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
for(i=0;i<p->bSize;i++){
////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
switch(ECQ[i]){
case 0:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
writeBits_Fast(outBuf,&bitPos,1,1); //0x1
//wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
case 1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,3,0); //0x000
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
//wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
case -1:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,3,8); //0x001
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
//wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
default:
////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
//temp1=bitPos;
//writeBits_Fast(outBuf,&bitPos,2,2); //0x01
writeBits_Fast(outBuf,&bitPos,1,0);
writeBits_Fast(outBuf,&bitPos,1,1);
//wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
break;
}
}
break;
}
////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
uint32_t bytePos=(bitPos+7)/8;
//*(uint32_t*)(&outBuf[9])=bytePos;
*(uint32_t*)(&outBuf[1])=bytePos;
//if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
if(D_G){assert(bitPos==CNonSparseBits);}
}
////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
}
static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){
pastri_blockParams bp;
//if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG
//if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG
//if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
//if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
int64_t patternQ[MAX_PS_SIZE];
int64_t scalesQ[MAX_PS_SIZE];
int64_t ECQ[MAX_BLOCK_SIZE];
float *data;
data=(float*)inBuf;
//STEP 0: PREPROCESSING:
//This step can include flattening the block, determining the period, etc.
//Currently not needed.
//STEP 1: PATTERN MATCH
pastri_float_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ);
//STEP 2: ENCODING(Include QUANTIZE)
pastri_float_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes);
return 0;
}
static inline float pastri_float_InverseQuantization(int64_t q, float binSize){
return q*binSize;
}
static inline void pastri_float_PredictData(pastri_params *p,pastri_blockParams *bp,float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
int j;
float PS_binSize=bp->scalesBinSize*bp->binSize;
for(j=0;j<p->bSize;j++){
//data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize;
data[j]=pastri_float_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_float_InverseQuantization(ECQ[j],bp->binSize);
}
}
static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){
int j;
bp->_1DIdxBits=bitsNeeded_UI64(p->bSize);
//float *data=(float*)(outBuf+p->bSize*8);
float *data=(float*)(outBuf);
int i0,i1,i2,i3;
//uint16_t *idx0,*idx1,*idx2,*idx3;
int _1DIdx;
int64_t ECQTemp;
uint64_t bytePos=0;
uint64_t bitPos=0;
uint64_t temp,temp2;
//int sb,localIdx;
//idx0=(uint16_t*)(outBuf );
//idx1=(uint16_t*)(outBuf+p->bSize*2);
//idx2=(uint16_t*)(outBuf+p->bSize*4);
//idx3=(uint16_t*)(outBuf+p->bSize*6);
//p->idxOffset[0]=*(uint32_t*)(&inBuf[1]);
//p->idxOffset[1]=*(uint32_t*)(&inBuf[3]);
//p->idxOffset[2]=*(uint32_t*)(&inBuf[5]);
//p->idxOffset[3]=*(uint32_t*)(&inBuf[7]);
/*
for(i0=0;i0<p->idxRange[0];i0++)
for(i1=0;i1<p->idxRange[1];i1++)
for(i2=0;i2<p->idxRange[2];i2++)
for(i3=0;i3<p->idxRange[3];i3++){
//_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3;
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
idx0[_1DIdx]=i0+1+p->idxOffset[0];
idx1[_1DIdx]=i1+1+p->idxOffset[1];
idx2[_1DIdx]=i2+1+p->idxOffset[2];
idx3[_1DIdx]=i3+1+p->idxOffset[3];
}
*/
//*numOutBytes=p->bSize*16;
//inBuf[0] is "mode"
switch(inBuf[0]){
//R:UCSparse
case 0:
//if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
//bp->nonZeros=*(uint16_t*)(&inBuf[9]);
//bytePos=11;
bp->nonZeros=*(uint16_t*)(&inBuf[1]);
bytePos=3;
for(j=0;j<p->bSize;j++){
data[j]=0;
}
for(j=0;j<bp->nonZeros;j++){
//i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0
i0=*(uint16_t*)(&inBuf[bytePos]); //i0
bytePos+=2;
//i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1
i1=*(uint16_t*)(&inBuf[bytePos]); //i1
bytePos+=2;
//i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2
i2=*(uint16_t*)(&inBuf[bytePos]); //i2
bytePos+=2;
//i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3
i3=*(uint16_t*)(&inBuf[bytePos]); //i3
bytePos+=2;
_1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3;
data[_1DIdx]=*(float*)(&inBuf[bytePos]);
bytePos+=8;
}
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
break;
//R:UCNonSparse
case 1:
//if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
//memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
memcpy(data, &inBuf[1], p->bSize*8);
bytePos=p->bSize*8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
break;
//R:CSparse
case 2:
//if(D_G){printf("\nDC:CSparse\n");} //DEBUG
//for(j=0;j<p->bSize;j++){
// data[j]=0;
//}
//bp->patternBits=inBuf[13];
//bp->ECQBits=inBuf[14];
bp->patternBits=inBuf[5];
bp->ECQBits=inBuf[6];
//if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
//bp->numOutliers=*(uint16_t*)(&inBuf[15]);
//bitPos=17*8;
bp->numOutliers=*(uint16_t*)(&inBuf[7]);
bitPos=9*8;
//if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1);
bp->binSize=p->usedEb*2;
//if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
for(j=0;j<p->sbSize;j++){
patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
//if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
}
for(j=0;j<p->sbNum;j++){
scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
//if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
}
/* //Splitting
for(j=0;j<p->bSize;j++){
data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
}
*/
for(j=0;j<p->bSize;j++){
ECQ[j]=0;
}
switch(bp->ECQBits){
case 2:
for(j=0;j<bp->numOutliers;j++){
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
ECQ[_1DIdx]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
default: //bp->ECQBits>2
//if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
for(j=0;j<bp->numOutliers;j++){
_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
temp=readBits_UI64(inBuf,&bitPos,1);
////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
switch(temp){
case 0: //+-1
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
break;
case 1: //Others
ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
break;
//default:
//// printf("ERROR: Bad 2-bit value: 0x%lx",temp);
// assert(0); //AMG
// break;
}
//data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting
ECQ[_1DIdx]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
}
//static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
//patternQ=(int64_t*)(inBuf+15);
//scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
bytePos=(bitPos+7)/8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
//STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
break;
//R:CNonSparse
case 3:
//if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
//for(j=0;j<p->bSize;j++){
// data[j]=0;
//}
//bp->patternBits=inBuf[13];
//bp->ECQBits=inBuf[14];
bp->patternBits=inBuf[5];
bp->ECQBits=inBuf[6];
//if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
//bitPos=15*8;
bitPos=7*8;
bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1);
bp->binSize=p->usedEb*2;
//if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
for(j=0;j<p->sbSize;j++){
patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
//if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
}
for(j=0;j<p->sbNum;j++){
scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
//if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
}
/* //Splitting
for(j=0;j<p->bSize;j++){
data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
}
*/
switch(bp->ECQBits){
case 2:
for(j=0;j<p->bSize;j++){
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
//_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
temp=readBits_UI64(inBuf,&bitPos,1);
switch(temp){
case 0:
ECQTemp=readBits_I64(inBuf,&bitPos,1);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
break;
case 1:
ECQTemp=0;
break;
default:
assert(0);
break;
}
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
//data[j]-=ECQTemp*bp->binSize; //Splitting
ECQ[j]=ECQTemp;
////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
}
break;
default: //bp->ECQBits>2
////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
for(j=0;j<p->bSize;j++){
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
//_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
temp=readBits_UI64(inBuf,&bitPos,1);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
switch(temp){
case 0:
////if(DEBUG)printf("Read:0");
temp2=readBits_UI64(inBuf,&bitPos,1);
switch(temp2){
case 0:
////if(DEBUG)printf("0");
ECQTemp=readBits_I64(inBuf,&bitPos,1);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
case 1:
////if(DEBUG)printf("1\n");
ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
default:
assert(0);
break;
}
break;
case 1:
////if(DEBUG)printf("Read:1\n");
ECQTemp=0;
////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
break;
default:
assert(0);
break;
}
////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
//continue;
//sb=_1DIdx/p->sbSize;
//localIdx=_1DIdx%p->sbSize;
//data[j]-=ECQTemp*bp->binSize; //Splitting
ECQ[j]=ECQTemp;
////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
}
break;
}
//static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56]
//patternQ=(int64_t*)(inBuf+15);
//scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
bytePos=(bitPos+7)/8;
//if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
//STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ);
break;
default:
assert(0);
break;
}
(*numReadBytes)=bytePos;
}
static inline void pastri_float_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){
int64_t patternQ[MAX_PS_SIZE];
int64_t scalesQ[MAX_PS_SIZE];
int64_t ECQ[MAX_BLOCK_SIZE];
pastri_blockParams bp;
//STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION))
//(Further steps are called inside pastri_float_Decode function)
pastri_float_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ);
return;
}
//inBuf vs Decompressed
static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){
int i;
float *data=(float*)(inBuf);
float *data_dc=(float*)(DC);
//Comparing Indexes:
/*
for(i=0;i<p->bSize;i++){
if(idx0[i]!=idx0_dc[i]){
//printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
assert(0);
}
if(idx1[i]!=idx1_dc[i]){
//printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
assert(0);
}
if(idx2[i]!=idx2_dc[i]){
//printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
assert(0);
}
if(idx3[i]!=idx3_dc[i]){
//printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
assert(0);
}
}
*/
//Comparing Data:
for(i=0;i<p->bSize;i++){
if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
//printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
assert(0);
}
}
return 0;
}
#endif

View File

@ -1,205 +0,0 @@
#ifndef PASTRIGENERAL_H
#define PASTRIGENERAL_H
static inline double abs_FastD(double x){
u_UI64I64D u1;
u1.d=x;
//(*((uint64_t *)(&x)))&=(int64_t)0x7FFFFFFFFFFFFFFF;
u1.ui64&=(int64_t)0x7FFFFFFFFFFFFFFF;
return u1.d;
}
static inline int64_t abs_FastI64(int64_t x){
return (x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0);
}
/*
int abs(int x) {
int mask = (x >> (sizeof(int) * CHAR_BIT - 1));
return (x + mask) ^ mask;
}
*/
//Returns the min. bits needed to represent x.
//Same as: ceil(log2(abs(x)))
//Actually to be completely safe, it correspond to: ceil(log2(abs(i)+1))+0.1
//+0.1 was for fixing rounding errors
//REMEMBER: To represent the whole range [-x:x], the number of bits required is bitsNeeded(x)+1
static inline int bitsNeeded_double(double x){
u_UI64I64D u1;
u1.d=x;
return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31);
}
//Returns the min. bits needed to represent x.
//Same as: ceil(log2(abs(x)))
//NEEDS OPTIMIZATION!
static inline int bitsNeeded_float(float x){
u_UI64I64D u1;
u1.d=x; //Casting to Double!
return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31);
}
static inline int bitsNeeded_UI64(uint64_t x){
int shift;
int res=0;
//Get the absolute value of x:
//x=(x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0);
//x=abs_FastI64(x);
//printf("%d\n",(x&(uint64_t)0xFFFFFFFF00000000)!=0);
shift=(((x&(uint64_t)0xFFFFFFFF00000000)!=0)*32);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x00000000FFFF0000)!=0);
shift=(((x&(uint64_t)0x00000000FFFF0000)!=0)*16);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x000000000000FF00)!=0);
shift=(((x&(uint64_t)0x000000000000FF00)!=0)*8);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x00000000000000F0)!=0);
shift=(((x&(uint64_t)0x00000000000000F0)!=0)*4);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x000000000000000C)!=0);
shift=(((x&(uint64_t)0x000000000000000C)!=0)*2);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x0000000000000002)!=0);
shift=((x&(uint64_t)0x0000000000000002)!=0);
x>>=shift;
res+=shift;
//printf("%d\n",(x&(uint64_t)0x0000000000000001)!=0);
shift=((x&(uint64_t)0x0000000000000001)!=0);
x>>=shift;
res+=shift;
//printf("BITS NEEDED: %d\n",res);
return res;
}
static inline int bitsNeeded_I64(int64_t x){
uint64_t ux;
ux=abs_FastI64(x);
return bitsNeeded_UI64(ux);
}
//Implementations(They are inline, so they should be in this header file)
static inline int myEndianType(){ //Should work for most cases. May not work at mixed endian systems.
uint64_t n=1;
if (*(unsigned char*)&n == 1){
//cout<<"Little-Endian"<<endl;
return 0; //0 for little endian
}
else{
//cout<<"Big-Endian"<<endl;
return 1; //1 for big endian
}
}
static inline void flipBytes_UI64(uint64_t *dataPtr){
unsigned char*tempA;
char temp8b;
tempA=(unsigned char*)dataPtr;
temp8b=tempA[7];
tempA[7]=tempA[0];
tempA[0]=temp8b;
temp8b=tempA[6];
tempA[6]=tempA[1];
tempA[1]=temp8b;
temp8b=tempA[5];
tempA[5]=tempA[2];
tempA[2]=temp8b;
temp8b=tempA[4];
tempA[4]=tempA[3];
tempA[3]=temp8b;
return;
}
//WARNING: readBits works properly only on Little Endian machines! (For Big Endians, some modifications are needed)
static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
//cout<<"bitPos:"<<(*bitPosPtr)<<"\tbitPos>>3:"<<(*bitPosPtr>>3)<<endl;
uint64_t temp64b = *(uint64_t*)(buffer + ( *bitPosPtr >> 3));
//NOTE: bitPos>>3 is the same as bitPos/8
temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007;
//cout<<endl;
//cout<<"bitpos>>3:"<<(bitPos>>3)<<" bitPos&0x7:"<<(bitPos & 0x00000007)<<" bitPos%8:"<<(bitPos%8)<<endl;
//cout<<"Read:"<<(temp64b & mask)<<" temp64b:"<<temp64b<<" Mask:"<<mask<<" numBits:"<<numBits<<endl;
(*bitPosPtr) += numBits;
return (temp64b & mask);
}
static inline int64_t readBits_I64(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
int64_t val;
val=readBits_UI64(buffer,bitPosPtr,numBits);//Read value
int64_t shiftAmount=64-numBits;
val=(val<<shiftAmount)>>shiftAmount;//Sign correction
return val;
}
//WARNING: readBits_EndianSafe is not tested on Big-Endian machines
static inline uint64_t readBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56]
uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
uint64_t temp64b = *(uint64_t*)(buffer + ((*bitPosPtr)>>3));
//NOTE: (*bitPosPtr)>>3 is the same as (*bitPosPtr)/8
if(myEndianType())
flipBytes_UI64(&temp64b);
temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007;
(*bitPosPtr) += numBits;
return temp64b & mask;
}
//WARNING: writeBits_Fast works properly only on Little Endian machines! (For Big Endians, some modifications are needed)
//The buffer should be initialized as 0's for this to work!
//Also, the range of data is not checked!(If data exceeds numBits, it may be cause problems)
static inline void writeBits_Fast(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,int64_t data){
//if(DEBUG){printf("writeBits_Fast: data:0x%lx %ld\n",data,data);} //DEBUG
//if(DEBUG){printf("writeBits_Fast: numBits:0x%lx %ld\n",numBits,numBits);} //DEBUG
uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
//if(DEBUG){printf("writeBits_Fast: mask:0x%lx %ld\n",mask,mask);} //DEBUG
//if(DEBUG){printf("writeBits_Fast: data&mask:0x%lx %ld\n",((*(uint64_t*)&data)&mask),((*(uint64_t*)&data)&mask));} //DEBUG
//if(DEBUG){printf("writeBits_Fast: buffer_O:0x%lx\n",*(uint64_t*)(buffer + ((*bitPosPtr)>>3)));} //DEBUG
*(uint64_t*)(buffer + ((*bitPosPtr)>>3)) |= ((*(uint64_t*)&data)&mask) << ((*bitPosPtr) & (uint64_t)0x0000000000000007);
//if(DEBUG){printf("writeBits_Fast: buffer_N:0x%lx\n",*(uint64_t*)(buffer + ((*bitPosPtr)>>3)));} //DEBUG
(*bitPosPtr) += numBits;
}
//WARNING: writeBits_EndianSafe is not tested on Big-Endian machines
static inline void writeBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,uint64_t data){
uint64_t mask = ((uint64_t)0x0000000000000001<<numBits)-1;
data=data&mask;
uint64_t temp64b_inBuffer=*(uint64_t*)(buffer + ((*bitPosPtr)>>3));
uint64_t temp64b_outBuffer=data << ((*bitPosPtr) & (uint64_t)0x0000000000000007);
if(myEndianType()){
flipBytes_UI64(&temp64b_inBuffer);
}
temp64b_outBuffer |= temp64b_inBuffer;
if(myEndianType()){
flipBytes_UI64(&temp64b_outBuffer);
}
*(uint64_t*)(buffer + ((*bitPosPtr)>>3))=temp64b_outBuffer; // "|=" may also work
(*bitPosPtr) += numBits;
}
#endif

View File

@ -1,89 +0,0 @@
/**
* @file io.h
* @author Sheng Di
* @date April, 2015
* @brief Header file for the whole io interface.
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _IO_H
#define _IO_H
#include <stdio.h>
#include <stdint.h>
#ifdef _WIN32
#define PATH_SEPARATOR ';'
#else
#define PATH_SEPARATOR ':'
#endif
#ifdef __cplusplus
extern "C" {
#endif
int checkFileExistance(char* filePath);
float** create2DArray_float(size_t m, size_t n);
void free2DArray_float(float** data, size_t m);
float*** create3DArray_float(size_t p, size_t m, size_t n);
void free3DArray_float(float*** data, size_t p, size_t m);
double** create2DArray_double(size_t m, size_t n);
void free2DArray_double(double** data, size_t m);
double*** create3DArray_double(size_t p, size_t m, size_t n);
void free3DArray_double(double*** data, size_t p, size_t m);
size_t checkFileSize(char *srcFilePath, int *status);
unsigned char *readByteData(char *srcFilePath, size_t *byteLength, int *status);
double *readDoubleData(char *srcFilePath, size_t *nbEle, int *status);
int8_t *readInt8Data(char *srcFilePath, size_t *nbEle, int *status);
int16_t *readInt16Data(char *srcFilePath, size_t *nbEle, int *status);
uint16_t *readUInt16Data(char *srcFilePath, size_t *nbEle, int *status);
int32_t *readInt32Data(char *srcFilePath, size_t *nbEle, int *status);
uint32_t *readUInt32Data(char *srcFilePath, size_t *nbEle, int *status);
int64_t *readInt64Data(char *srcFilePath, size_t *nbEle, int *status);
uint64_t *readUInt64Data(char *srcFilePath, size_t *nbEle, int *status);
float *readFloatData(char *srcFilePath, size_t *nbEle, int *status);
unsigned short* readShortData(char *srcFilePath, size_t *dataLength, int *status);
double *readDoubleData_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
int8_t *readInt8Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
int16_t *readInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
int64_t *readInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
uint64_t *readUInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
float *readFloatData_systemEndian(char *srcFilePath, size_t *nbEle, int *status);
void writeByteData(unsigned char *bytes, size_t byteLength, char *tgtFilePath, int *status);
void writeDoubleData(double *data, size_t nbEle, char *tgtFilePath, int *status);
void writeFloatData(float *data, size_t nbEle, char *tgtFilePath, int *status);
void writeDataSZ(void *data, int dataType, size_t nbEle, char *tgtFilePath, int *status);
void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *status);
void writeDoubleData_inBytes(double *data, size_t nbEle, char* tgtFilePath, int *status);
void writeShortData_inBytes(short *states, size_t stateLength, char *tgtFilePath, int *status);
void writeUShortData_inBytes(unsigned short *states, size_t stateLength, char *tgtFilePath, int *status);
void writeIntData_inBytes(int *states, size_t stateLength, char *tgtFilePath, int *status);
void writeUIntData_inBytes(unsigned int *states, size_t stateLength, char *tgtFilePath, int *status);
void writeLongData_inBytes(int64_t *states, size_t stateLength, char *tgtFilePath, int *status);
void writeULongData_inBytes(uint64_t *states, size_t stateLength, char *tgtFilePath, int *status);
void writeStrings(int nbStr, char *str[], char *tgtFilePath, int *status);
//void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int endianType, char *tgtFilePath, int *status);
void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize);
void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength);
void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle);
void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle);
void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len);
void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len);
void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _IO_H ----- */

View File

@ -18,7 +18,6 @@
#include "CompressElement.h"
#include "DynamicByteArray.h"
#include "DynamicIntArray.h"
#include "VarSet.h"
#include "TightDataPointStorageD.h"
#include "TightDataPointStorageF.h"
#include "conf.h"
@ -29,15 +28,8 @@
#include "sz_double.h"
#include "szd_float.h"
#include "szd_double.h"
#include "sz_opencl.h"
#include "callZlib.h"
#include "rw.h"
#include "pastri.h"
#include "utility.h"
#include "CacheTable.h"
#include "MultiLevelCacheTable.h"
#include "MultiLevelCacheTableWideInterval.h"
#include "sz_stats.h"
#ifdef _WIN32
#define PATH_SEPARATOR ';'
@ -172,15 +164,6 @@ extern sz_params *confparams_cpr;
extern sz_params *confparams_dec;
extern sz_exedata *exe_params;
//------------------------------------------------
extern SZ_VarSet* sz_varset;
extern sz_multisteps *multisteps; //compression based on multiple time steps (time-dimension based compression)
extern sz_tsc_metadata *sz_tsc;
//for pastri
#ifdef PASTRI
extern pastri_params pastri_par;
#endif
void SZ_Finalize();

View File

@ -6,7 +6,6 @@
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "DynamicFloatArray.h"
#ifndef _SZ_Float_H
#define _SZ_Float_H
@ -14,13 +13,11 @@
#ifdef __cplusplus
extern "C" {
#endif
unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize);
void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue);
unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision);
unsigned int optimize_intervals_and_compute_dense_position_float_1D(float *oriData, size_t dataLength, double realPrecision, float * dense_pos);
unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision);
TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
@ -29,13 +26,6 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
bool SZ_compress_args_float_NoCkRngeNoGzip_1D( unsigned char* newByteData, float *oriData,
size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f);
size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data);
size_t SZ_compress_float_1D_MDQ_RA_block_1D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, DynamicFloatArray * unpredictable_data);
void SZ_blocked_regression(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, float *params);
unsigned char * SZ_compress_float_1D_MDQ_RA(float *oriData, size_t r1, double realPrecision, size_t * comp_size);
void SZ_compress_args_float_withinRange(unsigned char* newByteData, float *oriData, size_t dataLength, size_t *outSize);

View File

@ -1,68 +0,0 @@
//make header C++/C inter-operable
#ifdef __cplusplus
extern "C" {
#endif
#ifndef SZ_OPENCL_H
#define SZ_OPENCL_H
#include<stddef.h>
//opaque pointer for opencl state
struct sz_opencl_state;
/**
* creates an opencl state for multiple uses of the compressor or
* returns an error code.
*
* \post if return code is SZ_NCES, the state object may only be passed to
* sz_opencl_release or sz_opencl_error_* otherwise it may be used in any
* sz_opencl_* function.
*
* \param[out] state the sz opencl state
* \return SZ_SUCCESS for success or SZ_NCES on error
*/
int sz_opencl_init(struct sz_opencl_state** state);
/**
* deinitializes an opencl state
*
* \param[in] state the sz opencl state
* \return SZ_SUCCESS
*/
int sz_opencl_release(struct sz_opencl_state** state);
/**
* returns a human readable error message for the last error recieved by state
*
* \param[in] state the sz opencl state
* \return a pointer to a string that describes the error
*/
const char* sz_opencl_error_msg(struct sz_opencl_state* state);
/**
* returns a numeric code for the last error recieved by state
*
* \param[in] state the sz opencl state
* \return the numeric error code
*/
int sz_opencl_error_code(struct sz_opencl_state* state);
/**
* confirms that the sz opencl state is ready to use by performing a vector addition
*
* \param[in] state the sz opencl state
* \return SZ_SUCCESS if the opencl implementation is functioning
*/
int sz_opencl_check(struct sz_opencl_state*);
unsigned char* sz_compress_float3d_opencl(float* data, size_t r1, size_t r2, size_t r3, double, size_t* out_size);
#endif /* SZ_OPENCL_H */
//make header C++/C inter-operable
#ifdef __cplusplus
}
#endif

View File

@ -1,58 +0,0 @@
/**
* @file ByteToolkit.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the ByteToolkit.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _STATS_H
#define _STATS_H
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct sz_stats
{
int use_mean;
size_t blockSize;
float lorenzoPercent;
float regressionPercent;
size_t lorenzoBlocks;
size_t regressionBlocks;
size_t totalBlocks;
//size_t huffmanTreeHeight;
size_t huffmanTreeSize; //before the final zstd
size_t huffmanCodingSize; //before the final zstd
float huffmanCompressionRatio;
int huffmanNodeCount;
size_t unpredictCount;
float unpredictPercent;
float zstdCompressionRatio; //not available yet
} sz_stats;
extern sz_stats sz_stat;
void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks);
void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNocdeCount);
void writeZstdCompressionRatio(float zstdCompressionRatio);
void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements);
void printSZStats();
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _STATS_H ----- */

View File

@ -1,692 +0,0 @@
/**
* @file ArithmeticCoding.c
* @author Sheng Di, Mark Thomas Nelson
* @date April, 2016
* @brief Byte Toolkit
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
* (C) The MIT License (MIT), this code was modified from Mark's arithmetic coding code: http://www.drdobbs.com/cpp/data-compression-with-arithmetic-encodin/240169251?pgno=1
*/
#include <sz.h>
#include <ArithmeticCoding.h>
inline void output_bit_1(unsigned int* buf)
{
(*buf) = (*buf) << 1;
(*buf) |= 1;
}
inline void output_bit_0(unsigned int* buf)
{
(*buf) = (*buf) << 1;
//(*byte) |= 0; //actually doesn't have to set the bit to 0
}
//TODO: problematic
inline unsigned int output_bit_1_plus_pending(int pending_bits)
{
unsigned int buf = 0, pbits = pending_bits;
output_bit_1(&buf);
while(pbits--)
output_bit_0(&buf);
buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit, which would be easier for the final output
return buf;
}
inline unsigned int output_bit_0_plus_pending(int pending_bits)
{
unsigned int buf = 0, pbits = pending_bits;
//output_bit_0(&buf);
while(pbits--)
output_bit_1(&buf);
buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit
return buf;
}
/**
* Create AriCoder for the following arithmetic encoding operation.
* In this function, it will compute the real frequency of the integer codes.
* @param int numOfStates (input): numOfStates is the real # states calculated to the optimization_num_of_interval code
* @param int *s (input): the integer code array (i.e., type_array generated by prediction+quantization)
* @param size_t length: the number of integer codes in the type_array
*
* */
AriCoder *createAriCoder(int numOfStates, int *s, size_t length)
{
AriCoder *ariCoder = (AriCoder*)malloc(sizeof(AriCoder));
memset(ariCoder, 0, sizeof(AriCoder));
ariCoder->numOfRealStates = numOfStates;
ari_init(ariCoder, s, length);
return ariCoder;
}
void freeAriCoder(AriCoder *ariCoder)
{
free(ariCoder->cumulative_frequency);
free(ariCoder);
}
void ari_init(AriCoder *ariCoder, int *s, size_t length)
{
size_t i; //# states is in the range of integer.
int index = 0;
size_t *freq = (size_t *)malloc(ariCoder->numOfRealStates*sizeof(size_t));
memset(freq, 0, ariCoder->numOfRealStates*sizeof(size_t));
for(i = 0;i < length;i++)
{
index = s[i];
freq[index]++;
}
int counter = 0;
size_t _sum = 0, sum = 0, freqDiv = 0;
ariCoder->cumulative_frequency = (Prob *)malloc(ariCoder->numOfRealStates*sizeof(Prob));
memset(ariCoder->cumulative_frequency, 0, ariCoder->numOfRealStates*sizeof(Prob));
if(length <= MAX_INTERVALS)
{
for (index = 0; index < ariCoder->numOfRealStates; index++)
{
if (freq[index])
{
sum += freq[index];
(ariCoder->cumulative_frequency[index]).low = _sum;
(ariCoder->cumulative_frequency[index]).high = sum;
(ariCoder->cumulative_frequency[index]).state = index;
_sum = sum;
counter++;
}
}
ariCoder->numOfValidStates = counter;
ariCoder->total_frequency = sum;
}
else
{
int intvSize = length%MAX_INTERVALS==0?length/MAX_INTERVALS:length/MAX_INTERVALS+1;
for (index = 0; index < ariCoder->numOfRealStates; index++)
{
if (freq[index])
{
freqDiv = freq[index]/intvSize; //control the sum of frequency to be no greater than MAX_INTERVALS
if(freqDiv==0)
freqDiv = 1;
sum += freqDiv;
(ariCoder->cumulative_frequency[index]).low = _sum;
(ariCoder->cumulative_frequency[index]).high = sum;
(ariCoder->cumulative_frequency[index]).state = index;
_sum = sum;
counter++;
}
}
ariCoder->numOfValidStates = counter;
ariCoder->total_frequency = sum;
}
free(freq);
}
/**
* Convert AriCoder to bytes for storage
* @param AriCoder* ariCoder (input)
* @param unsigned char** out (output)
*
* @return outSize
* */
unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out)
{
int numOfRealStates = ariCoder->numOfRealStates;
int numOfValidStates = ariCoder->numOfValidStates;
uint64_t total_frequency = ariCoder->total_frequency;
Prob* cumulative_frequency = ariCoder->cumulative_frequency;
unsigned int outSize = 0;
*out = (unsigned char*)malloc(2*sizeof(int)+sizeof(uint64_t)+sizeof(Prob)*numOfRealStates);
unsigned char* p = *out;
intToBytes_bigEndian(p, numOfRealStates);
p+=sizeof(int);
intToBytes_bigEndian(p, numOfValidStates);
p+=sizeof(int);
int64ToBytes_bigEndian(p, total_frequency);
p+=sizeof(uint64_t);
size_t i = 0;
if(total_frequency <= 65536)
{
uint16_t low, high;
if(numOfRealStates<=256)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint16_t)(cumulative_frequency[i].high);
if(high!=0) //if this state cell is not null
{
low = (uint16_t)(cumulative_frequency[i].low);
int16ToBytes_bigEndian(p,low);
p+=sizeof(uint16_t);
int16ToBytes_bigEndian(p,high);
p+=sizeof(uint16_t);
*(p++)=(unsigned char)cumulative_frequency[i].state;
//if(((unsigned char)cumulative_frequency[i].state)==129)
// printf("break i=%zu\n", i);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*5; //2*sizeof(uint16_t)+1
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint16_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint16_t)(cumulative_frequency[i].low);
int16ToBytes_bigEndian(p,low);
p+=sizeof(uint16_t);
int16ToBytes_bigEndian(p,high);
p+=sizeof(uint16_t);
uint16_t state = (uint16_t)cumulative_frequency[i].state;
int16ToBytes_bigEndian(p, state);
p+=sizeof(uint16_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*6;
}
else
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint16_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint16_t)(cumulative_frequency[i].low);
int16ToBytes_bigEndian(p,low);
p+=sizeof(uint16_t);
int16ToBytes_bigEndian(p,high);
p+=sizeof(uint16_t);
int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
p+=sizeof(uint32_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*8;
}
}
else if(total_frequency <=4294967296)
{
uint32_t low, high;
if(numOfRealStates<=256)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint32_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint32_t)(cumulative_frequency[i].low);
int32ToBytes_bigEndian(p,low);
p+=sizeof(uint32_t);
int32ToBytes_bigEndian(p,high);
p+=sizeof(uint32_t);
*(p++)=(unsigned char)cumulative_frequency[i].state;
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*9;
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint32_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint32_t)(cumulative_frequency[i].low);
int32ToBytes_bigEndian(p,low);
p+=sizeof(uint32_t);
int32ToBytes_bigEndian(p,high);
p+=sizeof(uint32_t);
uint16_t state = (uint16_t)cumulative_frequency[i].state;
int16ToBytes_bigEndian(p, state);
p+=sizeof(uint16_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*10;
}
else
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint32_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint32_t)(cumulative_frequency[i].low);
int32ToBytes_bigEndian(p,low);
p+=sizeof(uint32_t);
int32ToBytes_bigEndian(p,high);
p+=sizeof(uint32_t);
int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
p+=sizeof(uint32_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*12;
}
}
else
{
uint64_t low, high;
if(numOfRealStates<=256)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint64_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint64_t)(cumulative_frequency[i].low);
int64ToBytes_bigEndian(p,low);
p+=sizeof(uint64_t);
int64ToBytes_bigEndian(p,high);
p+=sizeof(uint64_t);
*(p++)=(unsigned char)cumulative_frequency[i].state;
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*17;
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint64_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint64_t)(cumulative_frequency[i].low);
int64ToBytes_bigEndian(p,low);
p+=sizeof(uint64_t);
int64ToBytes_bigEndian(p,high);
p+=sizeof(uint64_t);
uint16_t state = (uint16_t)cumulative_frequency[i].state;
int16ToBytes_bigEndian(p, state);
p+=sizeof(uint16_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*18;
}
else
{
for(i=0;i<numOfRealStates;i++)
{
high = (uint64_t)(cumulative_frequency[i].high);
if(high!=0)
{
low = (uint64_t)(cumulative_frequency[i].low);
int64ToBytes_bigEndian(p,low);
p+=sizeof(uint64_t);
int64ToBytes_bigEndian(p,high);
p+=sizeof(uint64_t);
int32ToBytes_bigEndian(p, cumulative_frequency[i].state);
p+=sizeof(uint32_t);
}
}
outSize = 2*sizeof(int)+sizeof(uint64_t)+ariCoder->numOfValidStates*20;
}
}
return outSize;
}
/**
* Reconstruct AriCoder based on the bytes loaded from compressed data
* @param AriCoder** ariCoder (ourput)
* @param unsigned char* bytes (input)
*
* @return offset
* */
int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes)
{
int offset = 0;
*ariCoder = (AriCoder*)malloc(sizeof(AriCoder));
memset(*ariCoder, 0, sizeof(AriCoder));
unsigned char *p = bytes;
int numOfRealStates = (*ariCoder)->numOfRealStates = bytesToInt_bigEndian(p);
p += sizeof(int);
int numOfValidStates = (*ariCoder)->numOfValidStates = bytesToInt_bigEndian(p);
p += sizeof(int);
size_t total_frequency = (*ariCoder)->total_frequency = bytesToInt64_bigEndian(p);
p += sizeof(uint64_t);
(*ariCoder)->cumulative_frequency = (Prob*)malloc((*ariCoder)->numOfRealStates*sizeof(Prob));
memset((*ariCoder)->cumulative_frequency, 0, (*ariCoder)->numOfRealStates*sizeof(Prob));
size_t i = 0;
unsigned char *low_p = NULL, *high_p = NULL, *state_p = NULL;
int state = 0;
if(total_frequency <= 65536)
{
if(numOfRealStates<=256)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint16_t);
state_p = high_p+sizeof(uint16_t);
state = *state_p;
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + 1;
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*5; //2*sizeof(uint16_t)+1
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint16_t);
state_p = high_p+sizeof(uint16_t);
state = bytesToUInt16_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint16_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*6;
}
else
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint16_t);
state_p = high_p+sizeof(uint16_t);
state = bytesToUInt32_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint32_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*8;
}
}
else if(total_frequency <=4294967296)
{
if(numOfRealStates<=256)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint32_t);
state_p = high_p+sizeof(uint32_t);
state = *state_p;
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + 1;
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*9;
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint32_t);
state_p = high_p+sizeof(uint32_t);
state = bytesToUInt16_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint16_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*10;
}
else
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint32_t);
state_p = high_p+sizeof(uint32_t);
state = bytesToUInt32_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint32_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*12;
}
}
else
{
if(numOfRealStates<=256)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint64_t);
state_p = high_p+sizeof(uint64_t);
state = *state_p;
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + 1;
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*17;
}
else if(numOfRealStates<=65536)
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint64_t);
state_p = high_p+sizeof(uint64_t);
state = bytesToUInt16_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint16_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*18;
}
else
{
for(i=0;i<numOfValidStates;i++)
{
low_p = p;
high_p = low_p+sizeof(uint64_t);
state_p = high_p+sizeof(uint64_t);
state = bytesToUInt32_bigEndian(state_p);
(*ariCoder)->cumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p);
(*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p);
(*ariCoder)->cumulative_frequency[state].state = state;
p = state_p + sizeof(uint32_t);
}
offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*20;
}
}
return offset;
}
/**
* Arithmetic Encoding
* @param AriCoder *ariCoder (input)
* @param int *s (input)
* @param size_t length (input)
* @param unsigned char *out (output)
* @param size_t *outSize (output)
*
* */
void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize)
{
int pending_bits = 0;
size_t low = 0;
size_t high = MAX_CODE;
size_t i = 0, range = 0;
size_t count = ariCoder->total_frequency;
int c = 0, lackBits = 0;
*outSize = 0;
unsigned char *outp = out;
Prob *cumulative_frequency = ariCoder->cumulative_frequency;
unsigned int buf = 0;
for (i=0;i<length;i++)
{
c = s[i];
Prob p = cumulative_frequency[c];
range = high - low + 1;
high = low + (range * p.high / count) - 1;
low = low + (range * p.low / count);
for ( ; ; )
{
if ( high < ONE_HALF )
{
buf = output_bit_0_plus_pending(pending_bits);
put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
pending_bits = 0;
}
else if ( low >= ONE_HALF )
{
buf = output_bit_1_plus_pending(pending_bits);
put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
pending_bits = 0;
}
else if ( low >= ONE_FOURTH && high < THREE_FOURTHS )
{
pending_bits++;
low -= ONE_FOURTH;
high -= ONE_FOURTH;
} else
break;
high <<= 1;
high++;
low <<= 1;
high &= MAX_CODE;
low &= MAX_CODE;
}
}
pending_bits++;
if(low < ONE_FOURTH)
{
buf = output_bit_0_plus_pending(pending_bits);
put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
}
else
{
buf = output_bit_1_plus_pending(pending_bits);
put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize);
}
}
/**
* Get the integer code based on Arithmetic Coding Value
* @param AriCoder *ariCoder (input)
* @param size_t scaled_value (input)
*
* @return Prob* (output)
*
* */
Prob* getCode(AriCoder *ariCoder, size_t scaled_value)
{
int numOfRealStates = ariCoder->numOfRealStates;
int i = 0;
Prob *p = ariCoder->cumulative_frequency;
for(i=0;i<numOfRealStates;i++,p++)
{
if(scaled_value < p->high)
break;
}
return p;
}
/**
* Get one bit from the input stream of bytes
* @param unsigned char* p (input): the current location to be read (byte) of the byte stream
* @param int offset (input): the offset of the specified byte in the byte stream
*
* @return unsigned char (output) : 1 or 0
* */
inline unsigned char get_bit(unsigned char* p, int offset)
{
return ((*p) >> (7-offset)) & 0x01;
}
/**
* Arithmetic Decoding algorithm
* @param AriCoder *ariCoder (input): the encoder with the constructed frequency information
* @param unsigned char *s (input): the compressed stream of bytes
* @param size_t s_len (input): the number of bytes in the 'unsigned char *s'
* @param size_t targetLength (input): the target number of elements in the type array
* @param int *out (output) : the result (type array decompressed from the stream 's')
*
* */
void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out)
{
size_t high = MAX_CODE;
size_t low = 0, i = 0;
size_t range = 0, scaled_value = 0;
size_t total_frequency = ariCoder->total_frequency;
unsigned char *sp = s+5;
unsigned int offset = 4;
size_t value = (bytesToUInt64_bigEndian(s) >> 20); //alignment with the MAX_CODE
size_t s_counter = sizeof(int);
for(i=0;i<targetLength;i++)
{
range = high - low + 1;
scaled_value = ((value - low + 1) * ariCoder->total_frequency - 1 ) / range;
Prob *p = getCode(ariCoder, scaled_value);
out[i] = p->state; //output the state to the 'out' array
high = low + (range*p->high)/total_frequency -1;
low = low + (range*p->low)/total_frequency;
for( ; ; )
{
if (high < ONE_HALF) {
//do nothing, bit is a zero
} else if ( low >= ONE_HALF )
{
value -= ONE_HALF; //subtract one half from all three code values
low -= ONE_HALF;
high -= ONE_HALF;
} else if ( low >= ONE_FOURTH && high < THREE_FOURTHS )
{
value -= ONE_FOURTH;
low -= ONE_FOURTH;
high -= ONE_FOURTH;
} else
break;
low <<= 1;
high <<= 1;
high++;
value <<= 1;
//load one bit from the input byte stream
if(s_counter < s_len)
{
value += get_bit(sp, offset++);
if(offset==8)
{
sp++;
s_counter++;
offset = 0;
}
}
}
}
}

View File

@ -1,100 +0,0 @@
/**
* @file CacheTable.c
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Cache Table
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include "CacheTable.h"
double* g_CacheTable;
uint32_t * g_InverseTable;
uint32_t baseIndex;
uint32_t topIndex;
int bits;
inline int doubleGetExpo(double d){
long* ptr = (long*)&d;
*ptr = ((*ptr) >> 52) - 1023;
return *ptr;
}
int CacheTableGetRequiredBits(double precision, int quantization_intervals){
double min_distance = pow((1+precision), -(quantization_intervals>>1)) * precision;
return -(doubleGetExpo(min_distance));
}
inline uint32_t CacheTableGetIndex(float value, int bits){
uint32_t* ptr = (uint32_t*)&value;
int shift = 32 - 9 - bits;
if(shift>0){
return (*ptr) >> shift;
}else{
return 0;
}
}
inline uint64_t CacheTableGetIndexDouble(double value, int bits){
uint64_t* ptr = (uint64_t*)&value;
int shift = 64 - 12 - bits;
if(shift>0){
return (*ptr) >> shift;
}else{
return 0;
}
}
inline int CacheTableIsInBoundary(uint32_t index){
if(index <= topIndex && index > baseIndex){
return 1;
}else{
return 0;
}
}
void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals){
bits = CacheTableGetRequiredBits(precision, quantization_intervals);
baseIndex = CacheTableGetIndex((float)smallest, bits)+1;
topIndex = CacheTableGetIndex((float)largest, bits);
uint32_t range = topIndex - baseIndex + 1;
g_InverseTable = (uint32_t *)malloc(sizeof(uint32_t) * range);
/*
uint32_t fillInPos = 0;
for(int i=0; i<count; i++){
if(i == 0){
continue;
}
uint32_t index = CacheTableGetIndex((float)table[i], bits) - baseIndex;
g_InverseTable[index] = i;
if(index > fillInPos){
for(int j=fillInPos; j<index; j++){
g_InverseTable[j] = g_InverseTable[index];
}
}
fillInPos = index + 1;
}
*/
for(int i=count-1; i>0; i--){
uint32_t upperIndex = CacheTableGetIndex((float)table[i]*(1+precision), bits);
uint32_t lowerIndex = CacheTableGetIndex((float)table[i]/(1+precision), bits);
for(uint32_t j = lowerIndex; j<=upperIndex; j++){
if(j<baseIndex || j >topIndex){
continue;
}
g_InverseTable[j-baseIndex] = i;
}
}
}
inline uint32_t CacheTableFind(uint32_t index){
return g_InverseTable[index-baseIndex];
}
void CacheTableFree(){
free(g_InverseTable);
}

View File

@ -1,57 +0,0 @@
/**
* @file DynamicFloatArray.c
* @author Sheng Di
* @date May, 2016
* @brief Dynamic Float Array
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "DynamicDoubleArray.h"
void new_DDA(DynamicDoubleArray **dda, size_t cap) {
*dda = (DynamicDoubleArray *)malloc(sizeof(DynamicDoubleArray));
(*dda)->size = 0;
(*dda)->capacity = cap;
(*dda)->array = (double*)malloc(sizeof(double)*cap);
}
void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data)
{
size_t size = dba->size;
if(size>0)
*data = (double*)malloc(size * sizeof(double));
else
*data = NULL;
memcpy(*data, dba->array, size*sizeof(double));
}
void free_DDA(DynamicDoubleArray *dda)
{
free(dda->array);
free(dda);
}
double getDDA_Data(DynamicDoubleArray *dda, size_t pos)
{
if(pos>=dda->size)
{
printf("Error: wrong position of DIA.\n");
exit(0);
}
return dda->array[pos];
}
void addDDA_Data(DynamicDoubleArray *dda, double value)
{
if(dda->size==dda->capacity)
{
dda->capacity *= 2;
dda->array = (double *)realloc(dda->array, dda->capacity*sizeof(double));
}
dda->array[dda->size] = value;
dda->size ++;
}

View File

@ -1,57 +0,0 @@
/**
* @file DynamicFloatArray.c
* @author Sheng Di
* @date May, 2016
* @brief Dynamic Float Array
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "DynamicFloatArray.h"
void new_DFA(DynamicFloatArray **dfa, size_t cap) {
*dfa = (DynamicFloatArray *)malloc(sizeof(DynamicFloatArray));
(*dfa)->size = 0;
(*dfa)->capacity = cap;
(*dfa)->array = (float*)malloc(sizeof(float)*cap);
}
void convertDFAtoFloats(DynamicFloatArray *dfa, float **data)
{
size_t size = dfa->size;
if(size>0)
*data = (float*)malloc(size * sizeof(float));
else
*data = NULL;
memcpy(*data, dfa->array, size*sizeof(float));
}
void free_DFA(DynamicFloatArray *dfa)
{
free(dfa->array);
free(dfa);
}
float getDFA_Data(DynamicFloatArray *dfa, size_t pos)
{
if(pos>=dfa->size)
{
printf("Error: wrong position of DIA.\n");
exit(0);
}
return dfa->array[pos];
}
void addDFA_Data(DynamicFloatArray *dfa, float value)
{
if(dfa->size==dfa->capacity)
{
dfa->capacity *= 2;
dfa->array = (float *)realloc(dfa->array, dfa->capacity*sizeof(float));
}
dfa->array[dfa->size] = value;
dfa->size++;
}

View File

@ -1,193 +0,0 @@
/**
* @file MultiLevelCacheTable.c
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Header file.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdint.h>
#include <memory.h>
#include <stdlib.h>
#include "stdio.h"
#include "MultiLevelCacheTable.h"
uint8_t MLCT_GetExpoIndex(float value){
uint32_t* ptr = (uint32_t*)&value;
return (*ptr) >> 23;
}
uint8_t MLCT_GetRequiredBits(float precision){
int32_t* ptr = (int32_t*)&precision;
return -(((*ptr) >> 23) - 127);
}
uint32_t MLCT_GetMantiIndex(float value, int bits){
uint32_t* ptr = (uint32_t*)&value;
(*ptr) = (*ptr) << 9 >> 9;
int shift = 32 - 9 - bits;
if(shift > 0){
return (*ptr) >> shift;
}else{
return (*ptr);
}
}
float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits){
float result = 0;
uint32_t *ptr = (uint32_t*)&result;
*ptr = expo;
(*ptr) = (*ptr) << 23;
(*ptr) |= (manti << (23-bits));
return result;
}
void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision){
uint8_t bits = MLCT_GetRequiredBits(precision);
topTable->bits = bits;
topTable->bottomBoundary = precisionTable[1]/(1+precision);
topTable->topBoundary = precisionTable[count-1]/(1-precision);
topTable->baseIndex = MLCT_GetExpoIndex(topTable->bottomBoundary);
topTable->topIndex = MLCT_GetExpoIndex(topTable->topBoundary);
int subTableCount = topTable->topIndex - topTable->baseIndex + 1;
topTable->subTables = (struct SubLevelTable*)malloc(sizeof(struct SubLevelTable) * subTableCount);
memset(topTable->subTables, 0, sizeof(struct SubLevelTable) * subTableCount);
//uint32_t expoBoundary[subTableCount];
uint8_t lastExpo = 0xff;
uint8_t lastIndex = 0;
for(int i=0; i<count; i++){
uint8_t expo = MLCT_GetExpoIndex(precisionTable[i]);
if(expo != lastExpo){
//expoBoundary[lastIndex] = i;
lastExpo = expo;
lastIndex++;
}
}
for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
struct SubLevelTable* processingSubTable = &topTable->subTables[i];
if(i == topTable->topIndex - topTable->baseIndex &&
MLCT_GetExpoIndex(topTable->topBoundary) == MLCT_GetExpoIndex(precisionTable[count-1])){
processingSubTable->topIndex = MLCT_GetMantiIndex(topTable->topBoundary, bits) - 1;
}else{
uint32_t maxIndex = 0;
for(int j=0; j<bits; j++){
maxIndex += 1 << j;
}
processingSubTable->topIndex = maxIndex;
}
if(i == 0 && MLCT_GetExpoIndex(topTable->bottomBoundary) == MLCT_GetExpoIndex(precisionTable[0])){
processingSubTable->baseIndex = MLCT_GetMantiIndex(topTable->bottomBoundary, bits)+1;
}else{
processingSubTable->baseIndex = 0;
}
int subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1;
processingSubTable->table = (uint32_t*)malloc(sizeof(uint32_t) * subTableLength);
memset(processingSubTable->table, 0, sizeof(uint32_t) * subTableLength);
processingSubTable->expoIndex = topTable->baseIndex + i;
}
uint32_t index = 1;
for(uint8_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){
struct SubLevelTable* processingSubTable = &topTable->subTables[i];
uint8_t expoIndex = i+topTable->baseIndex;
for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){
uint32_t mantiIndex = j+processingSubTable->baseIndex;
float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits);
float bottomBoundary = precisionTable[index] / (1+precision);
float topBoundary = precisionTable[index] / (1-precision);
if(sample < topBoundary && sample > bottomBoundary){
processingSubTable->table[j] = index;
}else{
//float newPrecision = precisionTable[index];
index++;
processingSubTable->table[j] = index;
if(j)
processingSubTable->table[j-1] = index;
else{
struct SubLevelTable* pastSubTable = &topTable->subTables[i-1];
pastSubTable->table[pastSubTable->topIndex - pastSubTable->baseIndex] = index;
}
}
}
if(i == topTable->topIndex - topTable->baseIndex){
uint32_t j = processingSubTable->topIndex - processingSubTable->baseIndex + 1;
uint32_t mantiIndex = j + processingSubTable->baseIndex;
float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits);
float bottomBoundary = precisionTable[index] / (1+precision);
float topBoundary = precisionTable[index] / (1-precision);
if(sample > topBoundary || sample < bottomBoundary){
index++;
processingSubTable->table[j-1] = index;
}
}
}
/*
long lastIndexInExpoRange = count-1;
bool trigger = false;
float preRange = 0.0;
uint32_t preIndex = 0;
for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
struct SubLevelTable* processingSubTable = &topTable->subTables[i];
if(trigger){
uint32_t bound = MLCT_GetMantiIndex(preRange, bits);
for(int j = processingSubTable->topIndex; j>=processingSubTable->baseIndex; j--){
if(j >= bound){
processingSubTable->table[j-processingSubTable->baseIndex] = preIndex;
}else{
break;
}
}
trigger = false;
}
long firstIndexInExpoRange = expoBoundary[i];
uint8_t expoInRange = MLCT_GetExpoIndex(precisionTable[firstIndexInExpoRange]);
for(int j=lastIndexInExpoRange; j>=firstIndexInExpoRange; j--){
float test = precisionTable[j];
uint32_t rangeTop = MLCT_GetMantiIndex(precisionTable[j]*(1+precision), bits) - 1;
uint32_t rangeBottom;
if(j == firstIndexInExpoRange){
preRange = precisionTable[j]/(1+precision);
if(expoInRange != MLCT_GetExpoIndex(preRange)){
trigger = true;
preIndex = firstIndexInExpoRange;
rangeBottom = 0;
}else{
rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1;
}
}else{
rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1;
}
for(int k = rangeBottom; k<=rangeTop; k++){
if( k <= processingSubTable->topIndex && k >= processingSubTable->baseIndex)
processingSubTable->table[k - processingSubTable->baseIndex] = j;
}
}
lastIndexInExpoRange = firstIndexInExpoRange-1;
}
*/
}
uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable){
uint8_t expoIndex = MLCT_GetExpoIndex(value);
if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){
struct SubLevelTable* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex];
uint32_t mantiIndex = MLCT_GetMantiIndex(value, topLevelTable->bits);
MLTC_RebuildFloat(expoIndex, mantiIndex, topLevelTable->bits);
if(mantiIndex >= subLevelTable->baseIndex && mantiIndex <= subLevelTable->topIndex)
return subLevelTable->table[mantiIndex - subLevelTable->baseIndex];
}
return 0;
}
void MultiLevelCacheTableFree(struct TopLevelTable* table){
for(int i=0; i<table->topIndex - table->baseIndex + 1; i++){
free(table->subTables[i].table);
}
free(table->subTables);
}

View File

@ -1,125 +0,0 @@
/**
* @file MultiLevelCacheTableWideInterval.h
* @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao
* @date Jan, 2019
* @brief Header file.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdbool.h>
#include "MultiLevelCacheTableWideInterval.h"
void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable)
{
for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--)
{
struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
free(processingSubTable->table);
}
free(topTable->subTables);
}
uint16_t MLCTWI_GetExpoIndex(double value){
uint64_t* ptr = (uint64_t*)&value;
return (*ptr) >> 52;
}
uint16_t MLCTWI_GetRequiredBits(double precision){
uint64_t* ptr = (uint64_t*)&precision;
return -(((*ptr) >> 52) - 1023);
}
uint64_t MLCTWI_GetMantiIndex(double value, int bits){
uint64_t* ptr = (uint64_t*)&value;
(*ptr) = (*ptr) << 12 >> 12;
int shift = 64 - 12 - bits;
if(shift > 0){
return (*ptr) >> shift;
}else{
return (*ptr);
}
}
double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits){
double result = 0;
uint64_t *ptr = (uint64_t*)&result;
*ptr = expo;
(*ptr) = (*ptr) << 52;
(*ptr) += (manti << (52-bits));
return result;
}
void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits){
uint16_t bits = MLCTWI_GetRequiredBits(precision) + plus_bits;
topTable->bits = bits;
topTable->bottomBoundary = precisionTable[1]/(1+precision);
topTable->topBoundary = precisionTable[count-1]/(1-precision);
topTable->baseIndex = MLCTWI_GetExpoIndex(topTable->bottomBoundary);
topTable->topIndex = MLCTWI_GetExpoIndex(topTable->topBoundary);
int subTableCount = topTable->topIndex - topTable->baseIndex + 1;
topTable->subTables = (struct SubLevelTableWideInterval*)malloc(sizeof(struct SubLevelTableWideInterval) * subTableCount);
memset(topTable->subTables, 0, sizeof(struct SubLevelTableWideInterval) * subTableCount);
for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){
struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
uint32_t maxIndex = 0;
for(int j=0; j<bits; j++){
maxIndex += 1 << j;
}
processingSubTable->topIndex = maxIndex;
processingSubTable->baseIndex = 0;
uint64_t subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1;
processingSubTable->table = (uint16_t*)malloc(sizeof(uint16_t) * subTableLength);
memset(processingSubTable->table, 0, sizeof(uint16_t) * subTableLength);
processingSubTable->expoIndex = topTable->baseIndex + i;
}
uint32_t index = 0;
bool flag = false;
for(uint16_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){
struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i];
uint16_t expoIndex = i+topTable->baseIndex;
for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){
uint64_t mantiIndex = j + processingSubTable->baseIndex;
double sampleBottom = MLTCWI_RebuildDouble(expoIndex, mantiIndex, topTable->bits);
double sampleTop = MLTCWI_RebuildDouble(expoIndex, mantiIndex+1, topTable->bits);
double bottomBoundary = precisionTable[index] / (1+precision);
double topBoundary = precisionTable[index] / (1-precision);
if(sampleTop < topBoundary && sampleBottom > bottomBoundary){
processingSubTable->table[j] = index;
flag = true;
}else{
if(flag && index < count-1){
index++;
processingSubTable->table[j] = index;
}else{
processingSubTable->table[j] = 0;
}
}
}
}
}
uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable){
uint16_t expoIndex = MLCTWI_GetExpoIndex(value);
if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){
struct SubLevelTableWideInterval* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex];
uint64_t mantiIndex = MLCTWI_GetMantiIndex(value, topLevelTable->bits);
return subLevelTable->table[mantiIndex - subLevelTable->baseIndex];
}
return 0;
}
void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table){
for(int i=0; i<table->topIndex - table->baseIndex + 1; i++){
free(table->subTables[i].table);
}
free(table->subTables);
}

View File

@ -70,59 +70,36 @@ size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size
return byteLength;
}
void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
{
if(intArrayLength > byteArrayLength*8)
{
printf("Error: intArrayLength > byteArrayLength*8\n");
printf("intArrayLength=%zu, byteArrayLength = %zu", intArrayLength, byteArrayLength);
exit(0);
}
if(intArrayLength>0)
*intArray = (unsigned char*)malloc(intArrayLength*sizeof(unsigned char));
else
*intArray = NULL;
size_t n = 0, i;
int tmp;
for (i = 0; i < byteArrayLength-1; i++)
if(stepLength > byteArrayLength*4)
{
tmp = byteArray[i];
(*intArray)[n++] = (tmp & 0x80) >> 7;
(*intArray)[n++] = (tmp & 0x40) >> 6;
(*intArray)[n++] = (tmp & 0x20) >> 5;
(*intArray)[n++] = (tmp & 0x10) >> 4;
(*intArray)[n++] = (tmp & 0x08) >> 3;
(*intArray)[n++] = (tmp & 0x04) >> 2;
(*intArray)[n++] = (tmp & 0x02) >> 1;
(*intArray)[n++] = (tmp & 0x01) >> 0;
printf("Error: stepLength > byteArray.length*4\n");
printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
exit(0);
}
if(stepLength>0)
*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
else
*intArray = NULL;
size_t i, n = 0;
for (i = 0; i < byteArrayLength; i++) {
unsigned char tmp = byteArray[i];
(*intArray)[n++] = (tmp & 0xC0) >> 6;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x30) >> 4;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x0C) >> 2;
if(n==stepLength)
break;
(*intArray)[n++] = tmp & 0x03;
if(n==stepLength)
break;
}
tmp = byteArray[i];
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x80) >> 7;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x40) >> 6;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x20) >> 5;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x10) >> 4;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x08) >> 3;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x04) >> 2;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x02) >> 1;
if(n == intArrayLength)
return;
(*intArray)[n++] = (tmp & 0x01) >> 0;
}
/**
@ -174,195 +151,6 @@ size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t tim
return byteLength;
}
size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result)
{
size_t i, j, byteLength = 0;
if(timeStepTypeLength%4==0)
byteLength = timeStepTypeLength*2/8;
else
byteLength = timeStepTypeLength*2/8+1;
size_t n = 0;
for(i = 0;i<byteLength;i++)
{
int tmp = 0;
/*for(j = 0;j<4&&n<timeStepTypeLength;j++)
{
int type = timeStepType[n];
switch(type)
{
case 0:
break;
case 1:
tmp = (tmp | (1 << (6-j*2)));
break;
case 2:
tmp = (tmp | (2 << (6-j*2)));
break;
case 3:
tmp = (tmp | (3 << (6-j*2)));
break;
default:
printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type);
exit(0);
}
n++;
}*/
for(j = 0;j<4&&n<timeStepTypeLength;j++)
{
unsigned char type = timeStepType[n];
tmp = tmp | type << (6-(j<<1));
n++;
}
result[i] = (unsigned char)tmp;
}
return byteLength;
}
void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
{
if(stepLength > byteArrayLength*4)
{
printf("Error: stepLength > byteArray.length*4\n");
printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
exit(0);
}
if(stepLength>0)
*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
else
*intArray = NULL;
size_t i, n = 0;
for (i = 0; i < byteArrayLength; i++) {
unsigned char tmp = byteArray[i];
(*intArray)[n++] = (tmp & 0xC0) >> 6;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x30) >> 4;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x0C) >> 2;
if(n==stepLength)
break;
(*intArray)[n++] = tmp & 0x03;
if(n==stepLength)
break;
}
}
size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result)
{
size_t i = 0, k = 0, byteLength = 0, n = 0;
if(timeStepTypeLength%8==0)
byteLength = timeStepTypeLength*3/8;
else
byteLength = timeStepTypeLength*3/8+1;
if(byteLength>0)
*result = (unsigned char*)malloc(byteLength*sizeof(unsigned char));
else
*result = NULL;
int tmp = 0;
for(n = 0;n<timeStepTypeLength;n++)
{
k = n%8;
switch(k)
{
case 0:
tmp = tmp | (timeStepType[n] << 5);
break;
case 1:
tmp = tmp | (timeStepType[n] << 2);
break;
case 2:
tmp = tmp | (timeStepType[n] >> 1);
(*result)[i++] = (unsigned char)tmp;
tmp = 0 | (timeStepType[n] << 7);
break;
case 3:
tmp = tmp | (timeStepType[n] << 4);
break;
case 4:
tmp = tmp | (timeStepType[n] << 1);
break;
case 5:
tmp = tmp | (timeStepType[n] >> 2);
(*result)[i++] = (unsigned char)tmp;
tmp = 0 | (timeStepType[n] << 6);
break;
case 6:
tmp = tmp | (timeStepType[n] << 3);
break;
case 7:
tmp = tmp | (timeStepType[n] << 0);
(*result)[i++] = (unsigned char)tmp;
tmp = 0;
break;
}
}
if(k!=7) //load the last one
(*result)[i] = (unsigned char)tmp;
return byteLength;
}
void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
{
if(stepLength > byteArrayLength*8/3)
{
printf("Error: stepLength > byteArray.length*8/3, impossible case unless bugs elsewhere.\n");
printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
exit(0);
}
if(stepLength>0)
*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
else
*intArray = NULL;
size_t i = 0, ii = 0, n = 0;
unsigned char tmp = byteArray[i];
for(n=0;n<stepLength;)
{
switch(n%8)
{
case 0:
(*intArray)[n++] = (tmp & 0xE0) >> 5;
break;
case 1:
(*intArray)[n++] = (tmp & 0x1C) >> 2;
break;
case 2:
ii = (tmp & 0x03) << 1;
i++;
tmp = byteArray[i];
ii |= (tmp & 0x80) >> 7;
(*intArray)[n++] = ii;
break;
case 3:
(*intArray)[n++] = (tmp & 0x70) >> 4;
break;
case 4:
(*intArray)[n++] = (tmp & 0x0E) >> 1;
break;
case 5:
ii = (tmp & 0x01) << 2;
i++;
tmp = byteArray[i];
ii |= (tmp & 0xC0) >> 6;
(*intArray)[n++] = ii;
break;
case 6:
(*intArray)[n++] = (tmp & 0x38) >> 3;
break;
case 7:
(*intArray)[n++] = (tmp & 0x07);
i++;
tmp = byteArray[i];
break;
}
}
}
inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength)
{
return 8 - k%8 - resiBitLength;
@ -412,92 +200,4 @@ size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsig
size_t size = dba->size;
free_DBA(dba);
return size;
}
/**
*
* @param timeStepType is the resiMidBits
* @param resiBitLength is the length of resiMidBits for each element, (the number of resiBitLength == the # of unpredictable elements
* @return
*/
size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes)
{
size_t i = 0, j = 0, k = 0;
int value;
DynamicByteArray* dba;
new_DBA(&dba, 1024);
int tmp = 0, leftMovSteps = 0;
for(j = 0;j<resiBitLengthLength;j++)
{
unsigned char rbl = resiBitLength[j];
if(rbl==0)
continue;
value = timeStepType[i];
leftMovSteps = getLeftMovingSteps(k, rbl);
if(leftMovSteps < 0)
{
tmp = tmp | (value >> (-leftMovSteps));
addDBA_Data(dba, (unsigned char)tmp);
tmp = 0 | (value << (8+leftMovSteps));
}
else if(leftMovSteps > 0)
{
tmp = tmp | (value << leftMovSteps);
}
else //==0
{
tmp = tmp | value;
addDBA_Data(dba, (unsigned char)tmp);
tmp = 0;
}
i++;
k += rbl;
}
if(leftMovSteps != 0)
addDBA_Data(dba, (unsigned char)tmp);
convertDBAtoBytes(dba, bytes);
size_t size = dba->size;
free_DBA(dba);
return size;
}
int computeBitNumRequired(size_t dataLength)
{
if(exe_params->SZ_SIZE_TYPE==4)
return 32 - numberOfLeadingZeros_Int(dataLength);
else
return 64 - numberOfLeadingZeros_Long(dataLength);
}
void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength)
{
size_t pairLength = (bytesLength*8)/(validLength+1);
size_t tmpLength = pairLength*2;
int tmpResult[tmpLength];
size_t i, j, k = 0;
for(i = 0;i<tmpLength;i+=2)
{
size_t outIndex = k/8;
int innerIndex = k%8;
unsigned char curByte = bytes[outIndex];
tmpResult[i] = (curByte >> (8-1-innerIndex)) & 0x01;
k++;
int numResult = extractBytes(bytes, k, validLength);
tmpResult[i+1] = numResult;
k = k + validLength;
}
*result = (int*)malloc(sizeof(int)*totalLength);
k = 0;
for(i = 0;i<tmpLength;i=i+2)
{
int state = tmpResult[i];
size_t num = tmpResult[i+1];
for(j = 0;j<num;j++)
(*result)[k++] = state;
}
}
}

View File

@ -1,254 +0,0 @@
/**
* @file Variable.c
* @author Sheng Di
* @date July, 2016
* @brief TypeManager is used to manage the type array: parsing of the bytes and other types in between.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "VarSet.h"
#include "sz.h"
void free_Variable_keepOriginalData(SZ_Variable* v)
{
if(v->varName!=NULL)
free(v->varName);
if(v->compressedBytes!=NULL)
free(v->compressedBytes);
if(v->multisteps!=NULL)
free_multisteps(v->multisteps);
free(v);
}
/**
*
* @deprecated
* */
void free_Variable_keepCompressedBytes(SZ_Variable* v)
{
if(v->varName!=NULL)
free(v->varName);
if(v->data!=NULL)
free(v->data);
if(v->multisteps!=NULL)
free_multisteps(v->multisteps);
free(v);
}
void free_Variable_all(SZ_Variable* v)
{
if(v->varName!=NULL)
free(v->varName);
if(v->data!=NULL)
free(v->data);
if(v->compressedBytes!=NULL)
free(v->compressedBytes);
if(v->multisteps!=NULL)
free_multisteps(v->multisteps);
free(v);
}
void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data,
int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,
size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
{
if(sz_varset==NULL)
{
sz_varset = (SZ_VarSet*)malloc(sizeof(SZ_VarSet));
sz_varset->header = (SZ_Variable*)malloc(sizeof(SZ_Variable));
sz_varset->header->next = NULL;
sz_varset->lastVar = sz_varset->header;
sz_varset->count = 0;
}
SZ_Variable* var = (SZ_Variable*)malloc(sizeof(SZ_Variable));
memset(var, 0, sizeof(SZ_Variable));
var->var_id = var_id;
var->varName = (char*)malloc(strlen(varName)+1);
memcpy(var->varName, varName, strlen(varName)+1);
//var->varName = varName;
var->dataType = dataType;
var->r5 = r5;
var->r4 = r4;
var->r3 = r3;
var->r2 = r2;
var->r1 = r1;
var->errBoundMode = errBoundMode;
var->absErrBound = absErrBound;
var->relBoundRatio = relBoundRatio;
var->pwRelBoundRatio = pwRelBoundRatio;
var->data = data;
var->multisteps = (sz_multisteps*)malloc(sizeof(sz_multisteps));
memset(var->multisteps, 0, sizeof(sz_multisteps));
size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);
if(dataType==SZ_FLOAT)
{
var->multisteps->hist_data = (float*)malloc(sizeof(float)*dataLen);
memset(var->multisteps->hist_data, 0, sizeof(float)*dataLen);
}
else if(dataType==SZ_DOUBLE)
{
var->multisteps->hist_data = (double*)malloc(sizeof(double)*dataLen);
memset(var->multisteps->hist_data, 0, sizeof(double)*dataLen);
}
var->compressedBytes = NULL;
var->next = NULL;
sz_varset->count ++;
sz_varset->lastVar->next = var;
sz_varset->lastVar = var;
}
int SZ_batchDelVar_ID(int var_id)
{
int state = SZ_batchDelVar_ID_vset(sz_varset, var_id);
return state;
}
int SZ_batchDelVar(char* varName)
{
int state = SZ_batchDelVar_vset(sz_varset, varName);
return state;
}
int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id)
{
int delSuccess = SZ_FAILED;
SZ_Variable* p = vset->header;
SZ_Variable* q = p->next;
while(q != NULL)
{
if(q->var_id == var_id)
{
p->next = q->next;
//free_Variable_all(q);
free_Variable_keepOriginalData(q);
vset->count --;
delSuccess = SZ_SUCCESS;
if(q->next==NULL) //means that q is the last variable
vset->lastVar = p;
break;
}
p = p->next;
q = q->next;
}
return delSuccess;
}
int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName)
{
int delSuccess = SZ_FAILED;
SZ_Variable* p = vset->header;
SZ_Variable* q = p->next;
while(q != NULL)
{
int cmpResult = strcmp(q->varName, varName);
if(cmpResult==0)
{
p->next = q->next;
//free_Variable_all(q);
free_Variable_keepOriginalData(q);
vset->count --;
delSuccess = SZ_SUCCESS;
break;
}
p = p->next;
q = q->next;
}
return delSuccess;
}
SZ_Variable* SZ_searchVar(char* varName)
{
SZ_Variable* p = sz_varset->header->next;
while(p!=NULL)
{
int checkName = strcmp(p->varName, varName);
if(checkName==0)
return p;
p = p->next;
}
return NULL;
}
void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1)
{
SZ_Variable* v = SZ_searchVar(varName);
*r5 = v->r5;
*r4 = v->r4;
*r3 = v->r3;
*r2 = v->r2;
*r1 = v->r1;
return (void*)v->data;
}
/**
*
* int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET
* */
void SZ_freeVarSet(int mode)
{
free_VarSet_vset(sz_varset, mode);
}
//free_VarSet will completely destroy the SZ_VarSet, so don't do it until you really don't need it any more!
/**
*
* int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET
* */
void free_VarSet_vset(SZ_VarSet *vset, int mode)
{
if(vset==NULL)
return;
SZ_Variable *p = vset->header;
while(p->next!=NULL)
{
SZ_Variable *q = p->next;
p->next = q->next;
if(mode==SZ_MAINTAIN_VAR_DATA)
free_Variable_keepOriginalData(q);
else if(mode==SZ_DESTROY_WHOLE_VARSET)
free_Variable_all(q);
}
free(sz_varset->header);
free(vset);
}
void free_multisteps(sz_multisteps* multisteps)
{
if(multisteps->hist_data!=NULL)
free(multisteps->hist_data);
free(multisteps);
}
inline int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count)
{
int j = 0;
for(j=0;j<var_count;j++)
{
if(var_ids[j]==cur_var_id)
return 1;
}
return 0;
}
SZ_Variable* SZ_getVariable(int var_id)
{
SZ_Variable* p = sz_varset->header->next;
while(p!=NULL)
{
if(var_id == p->var_id)
return p;
p = p->next;
}
return NULL;
}

38
deps/SZ/sz/src/conf.c vendored
View File

@ -375,15 +375,7 @@ int SZ_ReadConf(const char* sz_cfgFile) {
//initialization for Huffman encoding
//SZ_Reset();
}
else if(confparams_cpr->sol_ID == PASTRI)
{//load parameters for PSTRI
pastri_par.bf[0] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_0", 0);
pastri_par.bf[1] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_1", 0);
pastri_par.bf[2] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_2", 0);
pastri_par.bf[3] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_3", 0);
pastri_par.numBlocks = (int)iniparser_getint(ini, "PARAMETER:numBlocks", 0);
confparams_cpr->absErrBound = pastri_par.originalEb = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 1E-3);
}
iniparser_freedict(ini);
return SZ_SUCCESS;
@ -408,31 +400,3 @@ int SZ_LoadConf(const char* sz_cfgFile) {
}
return SZ_SUCCESS;
}
int checkVersion(unsigned char version)
{
return version <= versionNumber;
}
void initSZ_TSC()
{
sz_tsc = (sz_tsc_metadata*)malloc(sizeof(sz_tsc_metadata));
memset(sz_tsc, 0, sizeof(sz_tsc_metadata));
/*sprintf(sz_tsc->metadata_filename, "sz_tsc_metainfo.txt");
sz_tsc->metadata_file = fopen(sz_tsc->metadata_filename, "wb");
if (sz_tsc->metadata_file == NULL)
{
printf("Failed to open sz_tsc_metainfo.txt file for writing metainfo.\n");
exit(1);
}
fputs("#metadata of the time-step based compression\n", sz_tsc->metadata_file); */
}
/*double fabs(double value)
{
if(value<0)
return -value;
else
return value;
}*/

View File

@ -1,87 +0,0 @@
#include "pastri.h"
#include "pastriD.h"
#include "pastriF.h"
void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr){
FILE *paramsF;
paramsF=fopen(paramsFilename,"r");
if(paramsF==NULL){
printf("ERROR: Parameters file cannot be opened.\n");
printf("Filename: %s\n",paramsFilename);
assert(0);
}
fscanf(paramsF,"%d %d %d %d %lf %d %d",&paramsPtr->bf[0],&paramsPtr->bf[1],&paramsPtr->bf[2],&paramsPtr->bf[3],&paramsPtr->originalEb,&paramsPtr->dataSize,&paramsPtr->numBlocks);
//printf("Params: %d %d %d %d %.3e %d\n",paramsPtr->bf[0],paramsPtr->bf[1],paramsPtr->bf[2],paramsPtr->bf[3],paramsPtr->originalEb,paramsPtr->numBlocks);
fclose(paramsF);
}
void SZ_pastriPreprocessParameters(pastri_params *p){
//Preprocess by calculating some pastri_params:
//Calculate sbSize, sbNum, etc.:
p->idxRange[0]=(p->bf[0]+1)*(p->bf[0]+2)/2;
p->idxRange[1]=(p->bf[1]+1)*(p->bf[1]+2)/2;
p->idxRange[2]=(p->bf[2]+1)*(p->bf[2]+2)/2;
p->idxRange[3]=(p->bf[3]+1)*(p->bf[3]+2)/2;
p->sbSize=p->idxRange[2]*p->idxRange[3];
p->sbNum=p->idxRange[0]*p->idxRange[1];
p->bSize=p->sbSize*p->sbNum;
p->usedEb=p->originalEb*0.999; //This is needed just to eliminate some rounding errors. It has almost no effect on compression rate/ratios.
}
void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes){
(*compressedBufP) = (unsigned char*)calloc(p->numBlocks*p->bSize*p->dataSize,sizeof(char));
int bytes; //bytes for this block
int i;
size_t bytePos=0; //Current byte pos in the outBuf
memcpy(*compressedBufP, p, sizeof(pastri_params));
bytePos+=sizeof(pastri_params);
for(i=0;i<p->numBlocks;i++){
if(p->dataSize==8){
pastri_double_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes);
}else if(p->dataSize==4){
pastri_float_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes);
}
bytePos+=bytes;
//printf("bytes:%d\n",bytes);
}
*compressedBytes=bytePos;
//printf("totalBytesWritten:%d\n",*compressedBytes);
}
void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes){
int bytePos=0; //Current byte pos in the outBuf
memcpy(p, compressedBuf, sizeof(pastri_params));
bytePos+=sizeof(pastri_params);
(*decompressedBufP) = (unsigned char*)malloc(p->numBlocks*p->bSize*p->dataSize*sizeof(char));
int bytes; //bytes for this block
int i;
for(i=0;i<p->numBlocks;i++){
if(p->dataSize==8){
pastri_double_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes);
}else if(p->dataSize==4){
pastri_float_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes);
}
bytePos += bytes;
//printf("bytes:%d\n",bytes);
}
//printf("totalBytesRead:%d\n",bytePos);
*decompressedBytes=p->numBlocks*p->bSize*p->dataSize;
}
void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf){
int i;
for(i=0;i<p->numBlocks;i++){
if(p->dataSize==8){
pastri_double_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p);
}else if(p->dataSize==4){
pastri_float_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p);
}
}
}

1006
deps/SZ/sz/src/rw.c vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,205 +0,0 @@
! @file sdc_interface.F90
! @author Sheng Di (disheng222@gmail.com)
! @date Aug., 2014
! @ Mathematics and Computer Science (MCS)
! @ Argonne National Laboratory, Lemont, USA.
! @brief The key Fortran binding file to connect C language and Fortran (Fortran part)
MODULE RW
use :: ISO_C_BINDING
INTERFACE writeData
MODULE PROCEDURE WriteData_inBinary_d1_INTEGER_K1
MODULE PROCEDURE WriteData_inBinary_d1_REAL_K4
MODULE PROCEDURE WriteData_inBinary_d2_REAL_K4
MODULE PROCEDURE WriteData_inBinary_d3_REAL_K4
MODULE PROCEDURE WriteData_inBinary_d4_REAL_K4
MODULE PROCEDURE WriteData_inBinary_d5_REAL_K4
MODULE PROCEDURE WriteData_inBinary_d1_REAL_K8
MODULE PROCEDURE WriteData_inBinary_d2_REAL_K8
MODULE PROCEDURE WriteData_inBinary_d3_REAL_K8
MODULE PROCEDURE WriteData_inBinary_d4_REAL_K8
MODULE PROCEDURE WriteData_inBinary_d5_REAL_K8
END INTERFACE writeData
INTERFACE readData
MODULE PROCEDURE readByteData
MODULE PROCEDURE readFloatData
MODULE PROCEDURE readDoubleData
END INTERFACE readData
CONTAINS
!Bytes here could be an "allocatable" array, so it requires an extra "byteLength" io indicate the length (can't use size(Bytes))
SUBROUTINE WriteData_inBinary_d1_INTEGER_K1(Bytes, byteLength, FILE_PATH)
implicit none
INTEGER(KIND=1), DIMENSION(:) :: Bytes
CHARACTER(LEN=*) :: FILE_PATH
INTEGER(KIND=C_SIZE_T) :: byteLength
CALL writeByteFile(Bytes, byteLength, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d1_INTEGER_K1
SUBROUTINE WriteData_inBinary_d1_REAL_K4(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=4), DIMENSION(:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeFloatFile(VAR, nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d1_REAL_K4
SUBROUTINE WriteData_inBinary_d2_REAL_K4(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=4), DIMENSION(:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d2_REAL_K4
SUBROUTINE WriteData_inBinary_d3_REAL_K4(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=4), DIMENSION(:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d3_REAL_K4
SUBROUTINE WriteData_inBinary_d4_REAL_K4(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d4_REAL_K4
SUBROUTINE WriteData_inBinary_d5_REAL_K4(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeFloatFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d5_REAL_K4
!write data in binary for K8 data
SUBROUTINE WriteData_inBinary_d1_REAL_K8(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=8), DIMENSION(:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeDoubleFile(VAR, nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d1_REAL_K8
SUBROUTINE WriteData_inBinary_d2_REAL_K8(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=8), DIMENSION(:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d2_REAL_K8
SUBROUTINE WriteData_inBinary_d3_REAL_K8(VAR, FILE_PATH)
implicit none
REAL(KIND=8), DIMENSION(:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d3_REAL_K8
SUBROUTINE WriteData_inBinary_d4_REAL_K8(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d4_REAL_K8
SUBROUTINE WriteData_inBinary_d5_REAL_K8(VAR, nbEle, FILE_PATH)
implicit none
REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER :: nbEle
CALL writeDoubleFile(RESHAPE(VAR,(/nbEle/)), nbEle, FILE_PATH, len(trim(FILE_PATH)))
END SUBROUTINE WriteData_inBinary_d5_REAL_K8
!Check file size
SUBROUTINE checkFileSize(FILE_PATH, BYTESIZE)
implicit none
CHARACTER(LEN=*) :: FILE_PATH
INTEGER(kind=C_SIZE_T) :: BYTESIZE
CALL checkFileSizeC(FILE_PATH, len(trim(FILE_PATH)), BYTESIZE)
END SUBROUTINE checkFileSize
!Read data
SUBROUTINE readByteData(FILE_PATH, Bytes, outSize)
implicit none
INTEGER(KIND=1), DIMENSION(:), allocatable :: temp
INTEGER(KIND=1), DIMENSION(:), allocatable :: Bytes
CHARACTER(LEN=*) :: FILE_PATH
INTEGER(kind=C_SIZE_T) :: COUNTER
INTEGER(kind=C_SIZE_T), intent(out) :: outSize !in bytes
CALL checkFileSize(FILE_PATH, outSize)
allocate(temp(outSize))
CALL readByteFile(FILE_PATH, len(trim(FILE_PATH)), temp, outSize)
allocate(Bytes(outSize))
DO COUNTER=1,outSize,1
Bytes(COUNTER) = temp(COUNTER)
END DO
deallocate(temp)
END SUBROUTINE readByteData
SUBROUTINE readFloatData(FILE_PATH, VAR, nbEle)
implicit none
REAL(KIND=4), DIMENSION(:), allocatable :: temp
REAL(KIND=4), DIMENSION(:), allocatable :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER(kind=C_SIZE_T) :: COUNTER, fileSize
INTEGER(kind=C_SIZE_T), intent(out) :: nbEle
CALL checkFileSize(FILE_PATH, fileSize)
nbEle = fileSize/4
allocate(temp(nbEle))
CALL readFloatFile(FILE_PATH, len(trim(FILE_PATH)), temp, nbEle)
allocate(VAR(nbEle))
DO COUNTER=1,fileSize,1
VAR(COUNTER) = temp(COUNTER)
END DO
deallocate(temp)
END SUBROUTINE readFloatData
SUBROUTINE readDoubleData(FILE_PATH, VAR, nbEle)
implicit none
REAL(KIND=8), DIMENSION(:), allocatable :: temp
REAL(KIND=8), DIMENSION(:), allocatable :: VAR
CHARACTER(LEN=*) :: FILE_PATH
INTEGER(kind=C_SIZE_T) :: COUNTER, fileSize
INTEGER(kind=C_SIZE_T), intent(out) :: nbEle
CALL checkFileSize(FILE_PATH, fileSize)
nbEle = fileSize/8
allocate(temp(nbEle))
CALL readDoubleFile(FILE_PATH, len(trim(FILE_PATH)), temp, nbEle)
allocate(VAR(nbEle))
DO COUNTER=1,fileSize,1
VAR(COUNTER) = temp(COUNTER)
END DO
deallocate(temp)
END SUBROUTINE readDoubleData
END MODULE RW

96
deps/SZ/sz/src/rwf.c vendored
View File

@ -1,96 +0,0 @@
/**
* @file rw.c
* @author Sheng Di
* @date April, 2015
* @brief io interface for fortrance
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "rw.h"
void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize)
{
int i;
int status;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=srcFilePath[i];
s[*len]='\0';
*filesize = checkFileSize(s, &status);
}
void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=srcFilePath[i];
s[*len]='\0';
unsigned char *tmp_bytes = readByteData(s, byteLength, &ierr);
memcpy(bytes, tmp_bytes, *byteLength);
free(tmp_bytes);
}
void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=srcFilePath[i];
s[*len]='\0';
double *tmp_data = readDoubleData(s, nbEle, &ierr);
memcpy(data, tmp_data, *nbEle);
free(tmp_data);
}
void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=srcFilePath[i];
s[*len]='\0';
float *tmp_data = readFloatData(s, nbEle, &ierr);
memcpy(data, tmp_data, *nbEle);
free(tmp_data);
}
void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=tgtFilePath[i];
s[*len]='\0';
writeByteData(bytes, *byteLength, s, &ierr);
}
void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=tgtFilePath[i];
s[*len]='\0';
writeDoubleData(data, *nbEle, s, &ierr);
}
void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len)
{
size_t i;
int ierr;
char s[*len+1];
for(i=0;i<*len;i++)
s[i]=tgtFilePath[i];
s[*len]='\0';
writeFloatData(data, *nbEle, s, &ierr);
}

15
deps/SZ/sz/src/sz.c vendored
View File

@ -18,7 +18,6 @@
#include "TightDataPointStorageD.h"
#include "TightDataPointStorageF.h"
#include "zlib.h"
#include "rw.h"
#include "conf.h"
#include "utility.h"
@ -36,16 +35,6 @@ sz_params *confparams_dec = NULL; //used for decompression
sz_exedata *exe_params = NULL;
/*following global variables are desgined for time-series based compression*/
/*sz_varset is not used in the single-snapshot data compression*/
SZ_VarSet* sz_varset = NULL;
sz_multisteps *multisteps = NULL;
sz_tsc_metadata *sz_tsc = NULL;
//only for Pastri compressor
#ifdef PASTRI
pastri_params pastri_par;
#endif
@ -64,10 +53,6 @@ int SZ_Init(const char *configFilePath)
return SZ_FAILED;
exe_params->SZ_SIZE_TYPE = SZ_SIZE_TYPE_DEFUALT;
if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
{
initSZ_TSC();
}
return SZ_SUCCESS;
}

View File

@ -21,10 +21,7 @@
#include "sz_double.h"
#include "szd_double.h"
#include "zlib.h"
#include "rw.h"
#include "utility.h"
#include "CacheTable.h"
#include "MultiLevelCacheTableWideInterval.h"
#include "sz_stats.h"
unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)

View File

@ -21,22 +21,11 @@
#include "sz_float.h"
#include "szd_float.h"
#include "zlib.h"
#include "rw.h"
#include "utility.h"
#include "CacheTable.h"
#include "MultiLevelCacheTableWideInterval.h"
#include "sz_stats.h"
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
{
*outSize = dataLength*sizeof(float);
unsigned char* out = (unsigned char*)malloc(dataLength*sizeof(float));
memcpy(out, data, dataLength*sizeof(float));
return out;
}
void computeReqLength_float(double realPrecision, short rangeExpo, int* reqLength, float* medianValue)
{

File diff suppressed because it is too large Load Diff

View File

@ -1,60 +0,0 @@
#include <sz_stats.h>
sz_stats sz_stat;
void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks)
{
sz_stat.use_mean = use_mean;
sz_stat.blockSize = blockSize;
sz_stat.lorenzoBlocks = totalBlocks - regressionBlocks;
sz_stat.regressionBlocks = regressionBlocks;
sz_stat.totalBlocks = totalBlocks;
sz_stat.lorenzoPercent = 1.0f*sz_stat.lorenzoBlocks/(float)totalBlocks;
sz_stat.regressionPercent = 1.0f*regressionBlocks/(float)totalBlocks;
}
void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNodeCount)
{
sz_stat.huffmanTreeSize = huffmanTreeSize;
sz_stat.huffmanCodingSize = huffmanCodingSize;
sz_stat.huffmanCompressionRatio = 1.0f*totalDataSize/(huffmanTreeSize+huffmanCodingSize);
sz_stat.huffmanNodeCount = huffmanNodeCount;
}
void writeZstdCompressionRatio(float zstdCompressionRatio)
{
sz_stat.zstdCompressionRatio = zstdCompressionRatio;
}
void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements)
{
sz_stat.unpredictCount = unpredictCount;
sz_stat.unpredictPercent = 1.0f*unpredictCount/totalNumElements;
}
void printSZStats()
{
printf("===============stats about sz================\n");
if(sz_stat.use_mean)
printf("use_mean: YES\n");
else
printf("use_mean: NO\n");
printf("blockSize %zu\n", sz_stat.blockSize);
printf("lorenzoPercent %f\n", sz_stat.lorenzoPercent);
printf("regressionPercent %f\n", sz_stat.regressionPercent);
printf("lorenzoBlocks %zu\n", sz_stat.lorenzoBlocks);
printf("regressionBlocks %zu\n", sz_stat.regressionBlocks);
printf("totalBlocks %zu\n", sz_stat.totalBlocks);
printf("huffmanTreeSize %zu\n", sz_stat.huffmanTreeSize);
printf("huffmanCodingSize %zu\n", sz_stat.huffmanCodingSize);
printf("huffmanCompressionRatio %f\n", sz_stat.huffmanCompressionRatio);
printf("huffmanNodeCount %d\n", sz_stat.huffmanNodeCount);
//printf("zstdCompressionRatio %f\n", sz_stat.zstdCompressionRatio);
printf("unpredictCount %zu\n", sz_stat.unpredictCount);
printf("unpredictPercent %f\n", sz_stat.unpredictPercent);
}