homework-jianmu/2.0/importSampleData/app/main.go

1022 lines
30 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bufio"
"bytes"
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
dataImport "github.com/taosdata/TDengine/importSampleData/import"
_ "github.com/taosdata/driver-go/taosSql"
)
const (
// 主键类型必须为 timestamp
TIMESTAMP = "timestamp"
// 样例数据中主键时间字段是 millisecond 还是 dateTime 格式
DATETIME = "datetime"
MILLISECOND = "millisecond"
DefaultStartTime int64 = -1
DefaultInterval int64 = 1 * 1000 // 导入的记录时间间隔,该设置只会在指定 auto=1 之后生效,否则会根据样例数据自动计算间隔时间。单位为毫秒,默认 1000。
DefaultDelay int64 = -1 //
// 当 save 为 1 时保存统计信息的表名, 默认 statistic。
DefaultStatisticTable = "statistic"
// 样例数据文件格式,可以是 json 或 csv
JsonFormat = "json"
CsvFormat = "csv"
SuperTablePrefix = "s_" // 超级表前缀
SubTablePrefix = "t_" // 子表前缀
DriverName = "taosSql"
StartTimeLayout = "2006-01-02 15:04:05.000"
InsertPrefix = "insert into "
)
var (
cfg string // 导入配置文件路径,包含样例数据文件相关描述及对应 TDengine 配置信息。默认使用 config/cfg.toml
cases string // 需要导入的场景名称,该名称可从 -cfg 指定的配置文件中 [usecase] 查看可同时导入多个场景中间使用逗号分隔sensor_info,camera_detection默认为 sensor_info
hnum int // 需要将样例数据进行横向扩展的倍数,假设原有样例数据包含 1 张子表 t_0 数据,指定 hnum 为 2 时会根据原有表名创建 t、t_1 两张子表。默认为 100。
vnum int // 需要将样例数据进行纵向扩展的次数,如果设置为 0 代表将历史数据导入至当前时间后持续按照指定间隔导入。默认为 1000表示将样例数据在时间轴上纵向复制1000 次
thread int // 执行导入数据的线程数目,默认为 10
batch int // 执行导入数据时的批量大小,默认为 100。批量是指一次写操作时包含多少条记录
auto int // 是否自动生成样例数据中的主键时间戳1 是0 否, 默认 0
startTimeStr string // 导入的记录开始时间,格式为 "yyyy-MM-dd HH:mm:ss.SSS",不设置会使用样例数据中最小时间,设置后会忽略样例数据中的主键时间,会按照指定的 start 进行导入。如果 auto 为 1则必须设置 start默认为空
interval int64 // 导入的记录时间间隔,该设置只会在指定 auto=1 之后生效,否则会根据样例数据自动计算间隔时间。单位为毫秒,默认 1000
host string // 导入的 TDengine 服务器 IP默认为 127.0.0.1
port int // 导入的 TDengine 服务器端口,默认为 6030
user string // 导入的 TDengine 用户名,默认为 root
password string // 导入的 TDengine 用户密码,默认为 taosdata
dropdb int // 导入数据之前是否删除数据库1 是0 否, 默认 0
db string // 导入的 TDengine 数据库名称,默认为 test_yyyyMMdd
dbparam string // 当指定的数据库不存在时,自动创建数据库时可选项配置参数,如 days 10 cache 16000 ablocks 4默认为空
dataSourceName string
startTime int64
superTableConfigMap = make(map[string]*superTableConfig)
subTableMap = make(map[string]*dataRows)
scaleTableNames []string
scaleTableMap = make(map[string]*scaleTableInfo)
successRows []int64
lastStaticTime time.Time
lastTotalRows int64
timeTicker *time.Ticker
delay int64 // 当 vnum 设置为 0 时持续导入的时间间隔,默认为所有场景中最小记录间隔时间的一半,单位 ms。
tick int64 // 打印统计信息的时间间隔,默认 2000 ms。
save int // 是否保存统计信息到 tdengine 的 statistic 表中1 是0 否, 默认 0。
saveTable string // 当 save 为 1 时保存统计信息的表名, 默认 statistic。
)
type superTableConfig struct {
startTime int64
endTime int64
cycleTime int64
avgInterval int64
config dataImport.CaseConfig
}
type scaleTableInfo struct {
scaleTableName string
subTableName string
insertRows int64
}
//type tableRows struct {
// tableName string // tableName
// value string // values(...)
//}
type dataRows struct {
rows []map[string]interface{}
config dataImport.CaseConfig
}
func (rows dataRows) Len() int {
return len(rows.rows)
}
func (rows dataRows) Less(i, j int) bool {
iTime := getPrimaryKey(rows.rows[i][rows.config.Timestamp])
jTime := getPrimaryKey(rows.rows[j][rows.config.Timestamp])
return iTime < jTime
}
func (rows dataRows) Swap(i, j int) {
rows.rows[i], rows.rows[j] = rows.rows[j], rows.rows[i]
}
func getPrimaryKey(value interface{}) int64 {
val, _ := value.(int64)
//time, _ := strconv.ParseInt(str, 10, 64)
return val
}
func init() {
parseArg() // parse argument
if db == "" {
// 导入的 TDengine 数据库名称,默认为 test_yyyyMMdd
db = fmt.Sprintf("test_%s", time.Now().Format("20060102"))
}
if auto == 1 && len(startTimeStr) == 0 {
log.Fatalf("startTime must be set when auto is 1, the format is \"yyyy-MM-dd HH:mm:ss.SSS\" ")
}
if len(startTimeStr) != 0 {
t, err := time.ParseInLocation(StartTimeLayout, strings.TrimSpace(startTimeStr), time.Local)
if err != nil {
log.Fatalf("param startTime %s error, %s\n", startTimeStr, err)
}
startTime = t.UnixNano() / 1e6 // as millisecond
} else {
startTime = DefaultStartTime
}
dataSourceName = fmt.Sprintf("%s:%s@/tcp(%s:%d)/", user, password, host, port)
printArg()
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
}
func main() {
importConfig := dataImport.LoadConfig(cfg)
var caseMinInterval int64 = -1
for _, userCase := range strings.Split(cases, ",") {
caseConfig, ok := importConfig.UserCases[userCase]
if !ok {
log.Println("not exist case: ", userCase)
continue
}
checkUserCaseConfig(userCase, &caseConfig)
// read file as map array
fileRows := readFile(caseConfig)
log.Printf("case [%s] sample data file contains %d rows.\n", userCase, len(fileRows.rows))
if len(fileRows.rows) == 0 {
log.Printf("there is no valid line in file %s\n", caseConfig.FilePath)
continue
}
_, exists := superTableConfigMap[caseConfig.StName]
if !exists {
superTableConfigMap[caseConfig.StName] = &superTableConfig{config: caseConfig}
} else {
log.Fatalf("the stname of case %s already exist.\n", caseConfig.StName)
}
var start, cycleTime, avgInterval int64 = getSuperTableTimeConfig(fileRows)
// set super table's startTime, cycleTime and avgInterval
superTableConfigMap[caseConfig.StName].startTime = start
superTableConfigMap[caseConfig.StName].cycleTime = cycleTime
superTableConfigMap[caseConfig.StName].avgInterval = avgInterval
if caseMinInterval == -1 || caseMinInterval > avgInterval {
caseMinInterval = avgInterval
}
startStr := time.Unix(0, start*int64(time.Millisecond)).Format(StartTimeLayout)
log.Printf("case [%s] startTime %s(%d), average dataInterval %d ms, cycleTime %d ms.\n", userCase, startStr, start, avgInterval, cycleTime)
}
if DefaultDelay == delay {
// default delay
delay = caseMinInterval / 2
if delay < 1 {
delay = 1
}
log.Printf("actual delay is %d ms.", delay)
}
superTableNum := len(superTableConfigMap)
if superTableNum == 0 {
log.Fatalln("no valid file, exited")
}
start := time.Now()
// create super table
createSuperTable(superTableConfigMap)
log.Printf("create %d superTable ,used %d ms.\n", superTableNum, time.Since(start)/1e6)
// create sub table
start = time.Now()
createSubTable(subTableMap)
log.Printf("create %d times of %d subtable ,all %d tables, used %d ms.\n", hnum, len(subTableMap), len(scaleTableMap), time.Since(start)/1e6)
subTableNum := len(scaleTableMap)
if subTableNum < thread {
thread = subTableNum
}
filePerThread := subTableNum / thread
leftFileNum := subTableNum % thread
var wg sync.WaitGroup
start = time.Now()
successRows = make([]int64, thread)
startIndex, endIndex := 0, filePerThread
for i := 0; i < thread; i++ {
// start thread
if i < leftFileNum {
endIndex++
}
wg.Add(1)
go insertData(i, startIndex, endIndex, &wg, successRows)
startIndex, endIndex = endIndex, endIndex+filePerThread
}
lastStaticTime = time.Now()
timeTicker = time.NewTicker(time.Millisecond * time.Duration(tick))
go staticSpeed()
wg.Wait()
usedTime := time.Since(start)
total := getTotalRows(successRows)
log.Printf("finished insert %d rows, used %d ms, speed %d rows/s", total, usedTime/1e6, total*1e3/usedTime.Milliseconds())
if vnum == 0 {
// continue waiting for insert data
wait := make(chan string)
v := <-wait
log.Printf("program receive %s, exited.\n", v)
} else {
timeTicker.Stop()
}
}
func staticSpeed() {
connection := getConnection()
defer connection.Close()
if save == 1 {
_, _ = connection.Exec("use " + db)
_, err := connection.Exec("create table if not exists " + saveTable + "(ts timestamp, speed int)")
if err != nil {
log.Fatalf("create %s Table error: %s\n", saveTable, err)
}
}
for {
<-timeTicker.C
currentTime := time.Now()
usedTime := currentTime.UnixNano() - lastStaticTime.UnixNano()
total := getTotalRows(successRows)
currentSuccessRows := total - lastTotalRows
speed := currentSuccessRows * 1e9 / usedTime
log.Printf("insert %d rows, used %d ms, speed %d rows/s", currentSuccessRows, usedTime/1e6, speed)
if save == 1 {
insertSql := fmt.Sprintf("insert into %s values(%d, %d)", saveTable, currentTime.UnixNano()/1e6, speed)
_, _ = connection.Exec(insertSql)
}
lastStaticTime = currentTime
lastTotalRows = total
}
}
func getTotalRows(successRows []int64) int64 {
var total int64 = 0
for j := 0; j < len(successRows); j++ {
total += successRows[j]
}
return total
}
func getSuperTableTimeConfig(fileRows dataRows) (start, cycleTime, avgInterval int64) {
if auto == 1 {
// use auto generate data time
start = startTime
avgInterval = interval
maxTableRows := normalizationDataWithSameInterval(fileRows, avgInterval)
cycleTime = maxTableRows*avgInterval + avgInterval
} else {
// use the sample data primary timestamp
sort.Sort(fileRows) // sort the file data by the primaryKey
minTime := getPrimaryKey(fileRows.rows[0][fileRows.config.Timestamp])
maxTime := getPrimaryKey(fileRows.rows[len(fileRows.rows)-1][fileRows.config.Timestamp])
start = minTime // default startTime use the minTime
// 设置了start时间的话 按照start来
if DefaultStartTime != startTime {
start = startTime
}
tableNum := normalizationData(fileRows, minTime)
if minTime == maxTime {
avgInterval = interval
cycleTime = tableNum*avgInterval + avgInterval
} else {
avgInterval = (maxTime - minTime) / int64(len(fileRows.rows)) * tableNum
cycleTime = maxTime - minTime + avgInterval
}
}
return
}
func createSubTable(subTableMaps map[string]*dataRows) {
connection := getConnection()
defer connection.Close()
_, _ = connection.Exec("use " + db)
createTablePrefix := "create table if not exists "
var buffer bytes.Buffer
for subTableName := range subTableMaps {
superTableName := getSuperTableName(subTableMaps[subTableName].config.StName)
firstRowValues := subTableMaps[subTableName].rows[0] // the first rows values as tags
// create table t using superTable tags(...);
for i := 0; i < hnum; i++ {
tableName := getScaleSubTableName(subTableName, i)
scaleTableMap[tableName] = &scaleTableInfo{
subTableName: subTableName,
insertRows: 0,
}
scaleTableNames = append(scaleTableNames, tableName)
buffer.WriteString(createTablePrefix)
buffer.WriteString(tableName)
buffer.WriteString(" using ")
buffer.WriteString(superTableName)
buffer.WriteString(" tags(")
for _, tag := range subTableMaps[subTableName].config.Tags {
tagValue := fmt.Sprintf("%v", firstRowValues[strings.ToLower(tag.Name)])
buffer.WriteString("'" + tagValue + "'")
buffer.WriteString(",")
}
buffer.Truncate(buffer.Len() - 1)
buffer.WriteString(")")
createTableSql := buffer.String()
buffer.Reset()
//log.Printf("create table: %s\n", createTableSql)
_, err := connection.Exec(createTableSql)
if err != nil {
log.Fatalf("create table error: %s\n", err)
}
}
}
}
func createSuperTable(superTableConfigMap map[string]*superTableConfig) {
connection := getConnection()
defer connection.Close()
if dropdb == 1 {
dropDbSql := "drop database if exists " + db
_, err := connection.Exec(dropDbSql) // drop database if exists
if err != nil {
log.Fatalf("drop database error: %s\n", err)
}
log.Printf("dropdb: %s\n", dropDbSql)
}
createDbSql := "create database if not exists " + db + " " + dbparam
_, err := connection.Exec(createDbSql) // create database if not exists
if err != nil {
log.Fatalf("create database error: %s\n", err)
}
log.Printf("createDb: %s\n", createDbSql)
_, _ = connection.Exec("use " + db)
prefix := "create table if not exists "
var buffer bytes.Buffer
//CREATE TABLE <stable_name> (<field_name> TIMESTAMP, field_name1 field_type,…) TAGS(tag_name tag_type, …)
for key := range superTableConfigMap {
buffer.WriteString(prefix)
buffer.WriteString(getSuperTableName(key))
buffer.WriteString("(")
superTableConf := superTableConfigMap[key]
buffer.WriteString(superTableConf.config.Timestamp)
buffer.WriteString(" timestamp, ")
for _, field := range superTableConf.config.Fields {
buffer.WriteString(field.Name + " " + field.Type + ",")
}
buffer.Truncate(buffer.Len() - 1)
buffer.WriteString(") tags( ")
for _, tag := range superTableConf.config.Tags {
buffer.WriteString(tag.Name + " " + tag.Type + ",")
}
buffer.Truncate(buffer.Len() - 1)
buffer.WriteString(")")
createSql := buffer.String()
buffer.Reset()
//log.Printf("superTable: %s\n", createSql)
_, err = connection.Exec(createSql)
if err != nil {
log.Fatalf("create supertable error: %s\n", err)
}
}
}
func getScaleSubTableName(subTableName string, hNum int) string {
if hNum == 0 {
return subTableName
}
return fmt.Sprintf("%s_%d", subTableName, hNum)
}
func getSuperTableName(stName string) string {
return SuperTablePrefix + stName
}
/**
* normalizationData , and return the num of subTables
*/
func normalizationData(fileRows dataRows, minTime int64) int64 {
var tableNum int64 = 0
for _, row := range fileRows.rows {
// get subTableName
tableValue := getSubTableNameValue(row[fileRows.config.SubTableName])
if len(tableValue) == 0 {
continue
}
row[fileRows.config.Timestamp] = getPrimaryKey(row[fileRows.config.Timestamp]) - minTime
subTableName := getSubTableName(tableValue, fileRows.config.StName)
value, ok := subTableMap[subTableName]
if !ok {
subTableMap[subTableName] = &dataRows{
rows: []map[string]interface{}{row},
config: fileRows.config,
}
tableNum++
} else {
value.rows = append(value.rows, row)
}
}
return tableNum
}
// return the maximum table rows
func normalizationDataWithSameInterval(fileRows dataRows, avgInterval int64) int64 {
// subTableMap
currSubTableMap := make(map[string]*dataRows)
for _, row := range fileRows.rows {
// get subTableName
tableValue := getSubTableNameValue(row[fileRows.config.SubTableName])
if len(tableValue) == 0 {
continue
}
subTableName := getSubTableName(tableValue, fileRows.config.StName)
value, ok := currSubTableMap[subTableName]
if !ok {
row[fileRows.config.Timestamp] = 0
currSubTableMap[subTableName] = &dataRows{
rows: []map[string]interface{}{row},
config: fileRows.config,
}
} else {
row[fileRows.config.Timestamp] = int64(len(value.rows)) * avgInterval
value.rows = append(value.rows, row)
}
}
var maxRows, tableRows = 0, 0
for tableName := range currSubTableMap {
tableRows = len(currSubTableMap[tableName].rows)
subTableMap[tableName] = currSubTableMap[tableName] // add to global subTableMap
if tableRows > maxRows {
maxRows = tableRows
}
}
return int64(maxRows)
}
func getSubTableName(subTableValue string, superTableName string) string {
return SubTablePrefix + subTableValue + "_" + superTableName
}
func insertData(threadIndex, start, end int, wg *sync.WaitGroup, successRows []int64) {
connection := getConnection()
defer connection.Close()
defer wg.Done()
_, _ = connection.Exec("use " + db) // use db
log.Printf("thread-%d start insert into [%d, %d) subtables.\n", threadIndex, start, end)
num := 0
subTables := scaleTableNames[start:end]
var buffer bytes.Buffer
for {
var currSuccessRows int64
var appendRows int
var lastTableName string
buffer.WriteString(InsertPrefix)
for _, tableName := range subTables {
subTableInfo := subTableMap[scaleTableMap[tableName].subTableName]
subTableRows := int64(len(subTableInfo.rows))
superTableConf := superTableConfigMap[subTableInfo.config.StName]
tableStartTime := superTableConf.startTime
var tableEndTime int64
if vnum == 0 {
// need continue generate data
tableEndTime = time.Now().UnixNano() / 1e6
} else {
tableEndTime = tableStartTime + superTableConf.cycleTime*int64(vnum) - superTableConf.avgInterval
}
insertRows := scaleTableMap[tableName].insertRows
for {
loopNum := insertRows / subTableRows
rowIndex := insertRows % subTableRows
currentRow := subTableInfo.rows[rowIndex]
currentTime := getPrimaryKey(currentRow[subTableInfo.config.Timestamp]) + loopNum*superTableConf.cycleTime + tableStartTime
if currentTime <= tableEndTime {
// append
if lastTableName != tableName {
buffer.WriteString(tableName)
buffer.WriteString(" values")
}
lastTableName = tableName
buffer.WriteString("(")
buffer.WriteString(fmt.Sprintf("%v", currentTime))
buffer.WriteString(",")
for _, field := range subTableInfo.config.Fields {
buffer.WriteString(getFieldValue(currentRow[strings.ToLower(field.Name)]))
buffer.WriteString(",")
}
buffer.Truncate(buffer.Len() - 1)
buffer.WriteString(") ")
appendRows++
insertRows++
if appendRows == batch {
// executeBatch
insertSql := buffer.String()
affectedRows := executeBatchInsert(insertSql, connection)
successRows[threadIndex] += affectedRows
currSuccessRows += affectedRows
buffer.Reset()
buffer.WriteString(InsertPrefix)
lastTableName = ""
appendRows = 0
}
} else {
// finished insert current table
break
}
}
scaleTableMap[tableName].insertRows = insertRows
}
// left := len(rows)
if appendRows > 0 {
// executeBatch
insertSql := buffer.String()
affectedRows := executeBatchInsert(insertSql, connection)
successRows[threadIndex] += affectedRows
currSuccessRows += affectedRows
buffer.Reset()
}
// log.Printf("thread-%d finished insert %d rows, used %d ms.", threadIndex, currSuccessRows, time.Since(threadStartTime)/1e6)
if vnum != 0 {
// thread finished insert data
// log.Printf("thread-%d exit\n", threadIndex)
break
}
if num == 0 {
wg.Done() //finished insert history data
num++
}
if currSuccessRows == 0 {
// log.Printf("thread-%d start to sleep %d ms.", threadIndex, delay)
time.Sleep(time.Duration(delay) * time.Millisecond)
}
// need continue insert data
}
}
func executeBatchInsert(insertSql string, connection *sql.DB) int64 {
result, err := connection.Exec(insertSql)
if err != nil {
log.Printf("execute insertSql %s error, %s\n", insertSql, err)
return 0
}
affected, _ := result.RowsAffected()
if affected < 0 {
affected = 0
}
return affected
}
func getFieldValue(fieldValue interface{}) string {
return fmt.Sprintf("'%v'", fieldValue)
}
func getConnection() *sql.DB {
db, err := sql.Open(DriverName, dataSourceName)
if err != nil {
panic(err)
}
return db
}
func getSubTableNameValue(suffix interface{}) string {
return fmt.Sprintf("%v", suffix)
}
func readFile(config dataImport.CaseConfig) dataRows {
fileFormat := strings.ToLower(config.Format)
if fileFormat == JsonFormat {
return readJSONFile(config)
} else if fileFormat == CsvFormat {
return readCSVFile(config)
}
log.Printf("the file %s is not supported yet\n", config.FilePath)
return dataRows{}
}
func readCSVFile(config dataImport.CaseConfig) dataRows {
var rows dataRows
f, err := os.Open(config.FilePath)
if err != nil {
log.Printf("Error: %s, %s\n", config.FilePath, err)
return rows
}
defer f.Close()
r := bufio.NewReader(f)
//read the first line as title
lineBytes, _, err := r.ReadLine()
if err == io.EOF {
log.Printf("the file %s is empty\n", config.FilePath)
return rows
}
line := strings.ToLower(string(lineBytes))
titles := strings.Split(line, config.Separator)
if len(titles) < 3 {
// need suffix、 primaryKey and at least one other field
log.Printf("the first line of file %s should be title row, and at least 3 field.\n", config.FilePath)
return rows
}
rows.config = config
var lineNum = 0
for {
// read data row
lineBytes, _, err = r.ReadLine()
lineNum++
if err == io.EOF {
break
}
// fmt.Println(line)
rowData := strings.Split(string(lineBytes), config.Separator)
dataMap := make(map[string]interface{})
for i, title := range titles {
title = strings.TrimSpace(title)
if i < len(rowData) {
dataMap[title] = strings.TrimSpace(rowData[i])
} else {
dataMap[title] = ""
}
}
// if the suffix valid
if !existMapKeyAndNotEmpty(config.Timestamp, dataMap) {
log.Printf("the Timestamp[%s] of line %d is empty, will filtered.\n", config.Timestamp, lineNum)
continue
}
// if the primary key valid
primaryKeyValue := getPrimaryKeyMilliSec(config.Timestamp, config.TimestampType, config.TimestampTypeFormat, dataMap)
if primaryKeyValue == -1 {
log.Printf("the Timestamp[%s] of line %d is not valid, will filtered.\n", config.Timestamp, lineNum)
continue
}
dataMap[config.Timestamp] = primaryKeyValue
rows.rows = append(rows.rows, dataMap)
}
return rows
}
func readJSONFile(config dataImport.CaseConfig) dataRows {
var rows dataRows
f, err := os.Open(config.FilePath)
if err != nil {
log.Printf("Error: %s, %s\n", config.FilePath, err)
return rows
}
defer f.Close()
r := bufio.NewReader(f)
//log.Printf("file size %d\n", r.Size())
rows.config = config
var lineNum = 0
for {
lineBytes, _, err := r.ReadLine()
lineNum++
if err == io.EOF {
break
}
line := make(map[string]interface{})
err = json.Unmarshal(lineBytes, &line)
if err != nil {
log.Printf("line [%d] of file %s parse error, reason: %s\n", lineNum, config.FilePath, err)
continue
}
// transfer the key to lowercase
lowerMapKey(line)
if !existMapKeyAndNotEmpty(config.SubTableName, line) {
log.Printf("the SubTableName[%s] of line %d is empty, will filtered.\n", config.SubTableName, lineNum)
continue
}
primaryKeyValue := getPrimaryKeyMilliSec(config.Timestamp, config.TimestampType, config.TimestampTypeFormat, line)
if primaryKeyValue == -1 {
log.Printf("the Timestamp[%s] of line %d is not valid, will filtered.\n", config.Timestamp, lineNum)
continue
}
line[config.Timestamp] = primaryKeyValue
rows.rows = append(rows.rows, line)
}
return rows
}
/**
* get primary key as millisecond , otherwise return -1
*/
func getPrimaryKeyMilliSec(key string, valueType string, valueFormat string, line map[string]interface{}) int64 {
if !existMapKeyAndNotEmpty(key, line) {
return -1
}
if DATETIME == valueType {
// transfer the datetime to milliseconds
return parseMillisecond(line[key], valueFormat)
}
value, err := strconv.ParseInt(fmt.Sprintf("%v", line[key]), 10, 64)
// as millisecond num
if err != nil {
return -1
}
return value
}
// parseMillisecond parse the dateStr to millisecond, return -1 if failed
func parseMillisecond(str interface{}, layout string) int64 {
value, ok := str.(string)
if !ok {
return -1
}
t, err := time.ParseInLocation(layout, strings.TrimSpace(value), time.Local)
if err != nil {
log.Println(err)
return -1
}
return t.UnixNano() / 1e6
}
// lowerMapKey transfer all the map key to lowercase
func lowerMapKey(maps map[string]interface{}) {
for key := range maps {
value := maps[key]
delete(maps, key)
maps[strings.ToLower(key)] = value
}
}
func existMapKeyAndNotEmpty(key string, maps map[string]interface{}) bool {
value, ok := maps[key]
if !ok {
return false
}
str, err := value.(string)
if err && len(str) == 0 {
return false
}
return true
}
func checkUserCaseConfig(caseName string, caseConfig *dataImport.CaseConfig) {
if len(caseConfig.StName) == 0 {
log.Fatalf("the stname of case %s can't be empty\n", caseName)
}
caseConfig.StName = strings.ToLower(caseConfig.StName)
if len(caseConfig.Tags) == 0 {
log.Fatalf("the tags of case %s can't be empty\n", caseName)
}
if len(caseConfig.Fields) == 0 {
log.Fatalf("the fields of case %s can't be empty\n", caseName)
}
if len(caseConfig.SubTableName) == 0 {
log.Fatalf("the suffix of case %s can't be empty\n", caseName)
}
caseConfig.SubTableName = strings.ToLower(caseConfig.SubTableName)
caseConfig.Timestamp = strings.ToLower(caseConfig.Timestamp)
var timestampExist = false
for i, field := range caseConfig.Fields {
if strings.EqualFold(field.Name, caseConfig.Timestamp) {
if strings.ToLower(field.Type) != TIMESTAMP {
log.Fatalf("case %s's primaryKey %s field type is %s, it must be timestamp\n", caseName, caseConfig.Timestamp, field.Type)
}
timestampExist = true
if i < len(caseConfig.Fields)-1 {
// delete middle item, a = a[:i+copy(a[i:], a[i+1:])]
caseConfig.Fields = caseConfig.Fields[:i+copy(caseConfig.Fields[i:], caseConfig.Fields[i+1:])]
} else {
// delete the last item
caseConfig.Fields = caseConfig.Fields[:len(caseConfig.Fields)-1]
}
break
}
}
if !timestampExist {
log.Fatalf("case %s primaryKey %s is not exist in fields\n", caseName, caseConfig.Timestamp)
}
caseConfig.TimestampType = strings.ToLower(caseConfig.TimestampType)
if caseConfig.TimestampType != MILLISECOND && caseConfig.TimestampType != DATETIME {
log.Fatalf("case %s's timestampType %s error, only can be timestamp or datetime\n", caseName, caseConfig.TimestampType)
}
if caseConfig.TimestampType == DATETIME && len(caseConfig.TimestampTypeFormat) == 0 {
log.Fatalf("case %s's timestampTypeFormat %s can't be empty when timestampType is datetime\n", caseName, caseConfig.TimestampTypeFormat)
}
}
func parseArg() {
flag.StringVar(&cfg, "cfg", "config/cfg.toml", "configuration file which describes useCase and data format.")
flag.StringVar(&cases, "cases", "sensor_info", "useCase for dataset to be imported. Multiple choices can be separated by comma, for example, -cases sensor_info,camera_detection.")
flag.IntVar(&hnum, "hnum", 100, "magnification factor of the sample tables. For example, if hnum is 100 and in the sample data there are 10 tables, then 10x100=1000 tables will be created in the database.")
flag.IntVar(&vnum, "vnum", 1000, "copies of the sample records in each table. If set to 0this program will never stop simulating and importing data even if the timestamp has passed current time.")
flag.Int64Var(&delay, "delay", DefaultDelay, "the delay time interval(millisecond) to continue generating data when vnum set 0.")
flag.Int64Var(&tick, "tick", 2000, "the tick time interval(millisecond) to print statistic info.")
flag.IntVar(&save, "save", 0, "whether to save the statistical info into 'statistic' table. 0 is disabled and 1 is enabled.")
flag.StringVar(&saveTable, "savetb", DefaultStatisticTable, "the table to save 'statistic' info when save set 1.")
flag.IntVar(&thread, "thread", 10, "number of threads to import data.")
flag.IntVar(&batch, "batch", 100, "rows of records in one import batch.")
flag.IntVar(&auto, "auto", 0, "whether to use the startTime and interval specified by users when simulating the data. 0 is disabled and 1 is enabled.")
flag.StringVar(&startTimeStr, "start", "", "the starting timestamp of simulated data, in the format of yyyy-MM-dd HH:mm:ss.SSS. If not specified, the earliest timestamp in the sample data will be set as the startTime.")
flag.Int64Var(&interval, "interval", DefaultInterval, "time interval between two consecutive records, in the unit of millisecond. Only valid when auto is 1.")
flag.StringVar(&host, "host", "127.0.0.1", "tdengine server ip.")
flag.IntVar(&port, "port", 6030, "tdengine server port.")
flag.StringVar(&user, "user", "root", "user name to login into the database.")
flag.StringVar(&password, "password", "taosdata", "the import tdengine user password")
flag.IntVar(&dropdb, "dropdb", 0, "whether to drop the existing database. 1 is yes and 0 otherwise.")
flag.StringVar(&db, "db", "", "name of the database to store data.")
flag.StringVar(&dbparam, "dbparam", "", "database configurations when it is created.")
flag.Parse()
}
func printArg() {
fmt.Println("used param: ")
fmt.Println("-cfg: ", cfg)
fmt.Println("-cases:", cases)
fmt.Println("-hnum:", hnum)
fmt.Println("-vnum:", vnum)
fmt.Println("-delay:", delay)
fmt.Println("-tick:", tick)
fmt.Println("-save:", save)
fmt.Println("-savetb:", saveTable)
fmt.Println("-thread:", thread)
fmt.Println("-batch:", batch)
fmt.Println("-auto:", auto)
fmt.Println("-start:", startTimeStr)
fmt.Println("-interval:", interval)
fmt.Println("-host:", host)
fmt.Println("-port", port)
fmt.Println("-user", user)
fmt.Println("-password", password)
fmt.Println("-dropdb", dropdb)
fmt.Println("-db", db)
fmt.Println("-dbparam", dbparam)
}