CSV 输入数据格式
使用 csv
输入数据格式将逗号分隔值解析为 Telegraf 指标。
配置
[[inputs.file]]
files = ["example"]
## The data format to consume.
## Type: string
## Each data format has its own unique set of configuration options.
## For more information about input data formats and options,
## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "csv"
## Specifies the number of rows to treat as the header.
## Type: integer
## Default: 0
## The value can be 0 or greater.
## If `0`, doesn't use a header; the parser treats all rows as data and uses the names specified in `csv_column_names`.
## If `1`, uses the first row as the header.
## If greater than `1`, concatenates that number of values for each column.
## Values specified in `csv_column_names` override column names in the header.
csv_header_row_count = 0
## Specifies custom names for columns.
## Type: []string
## Default: []
## Specify names in order by column; unnamed columns are ignored by the parser.
## Required if `csv_header_row_count` is set to `0`.
csv_column_names = []
## Specifies data types for columns.
## Type: []string{"int", "float", "bool", "string"}
## Default: Tries to convert each column to one of the possible types, in the following order: "int", "float", "bool", "string".
## Possible values: "int", "float", "bool", "string".
## Specify types in order by column (for example, `["string", "int", "float"]`).
csv_column_types = []
## Specifies the number of rows to skip before looking for metadata and header information.
## Default: 0
csv_skip_rows = 0
## Specifies the number of rows to parse as metadata (before looking for header information).
## Type: integer
## Default: 0; no metadata rows to parse.
## If set, parses the rows using the characters specified in `csv_metadata_separators`, and then adds the
## parsed key-value pairs as tags in the data.
## To convert the tags to fields, use the converter processor.
csv_metadata_rows = 0
## Specifies metadata separators, in order of precedence, for parsing metadata rows.
## Type: []string
## At least one separator is required if `csv_metadata_rows` is set.
## The specified values set the order of precedence for separators used to parse `csv_metadata_rows` into key-value pairs.
## Separators are case-sensitive.
csv_metadata_separators = [":", "="]
## Specifies a set of characters to trim from metadata rows.
## Type: string
## Default: empty; the parser doesn't trim metadata rows.
## Trim characters are case sensitive.
csv_metadata_trim_set = ""
## Specifies the number of columns to skip in header and data rows.
## Type: integer
## Default: 0; no columns are skipped
csv_skip_columns = 0
## Specifies the separator for columns in the CSV.
## Type: string
## Default: a comma (`,`)
## If you specify an invalid delimiter (for example, `"\u0000"`),
## the parser converts commas to `"\ufffd"` and converts invalid delimiters
## to commas, parses the data, and then reverts invalid characters and commas
## to their original values.
csv_delimiter = ","
## Specifies the character used to indicate a comment row.
## Type: string
## Default: empty; no rows are treated as comments
## The parser skips rows that begin with the specified character.
csv_comment = ""
## Specifies whether to remove leading whitespace from fields.
## Type: boolean
## Default: false
csv_trim_space = false
## Specifies columns (by name) to use as tags.
## Type: []string
## Default: empty
## Columns not specified as tags or measurement name are considered fields.
csv_tag_columns = []
## Specifies whether column tags overwrite metadata and default tags.
## Type: boolean
## Default: false
## If true, the column tag value takes precedence over metadata
## or default tags that have the same name.
csv_tag_overwrite = false
## Specifies the CSV column to use for the measurement name.
## Type: string
## Default: empty; uses the input plugin name for the measurement name.
## If set, the measurement name is extracted from values in the specified
## column and the column isn't included as a field.
csv_measurement_column = ""
## Specifies the CSV column to use for the timestamp.
## Type: string
## Default: empty; uses the current system time as the timestamp in metrics
## If set, the parser extracts time values from the specified column
## to use as timestamps in metrics, and the column isn't included
## as a field in metrics.
## If set, you must also specify a value for `csv_timestamp_format`.
## For more information, see [timestamps](/telegraf/v1/data_formats/input/csv/#timestamps).
csv_timestamp_column = ""
## Specifies the timestamp format for values extracted from `csv_timestamp_column`.
## Type: string
## Possible values: "unix", "unix_ms", "unix_us", "unix_ns", the Go reference time in one of the predefined layouts
## Default: empty
## Required if `csv_timestamp_column` is specified.
## For more information, see [timestamps](/telegraf/v1/data_formats/input/csv/#timestamps).
csv_timestamp_format = ""
## Specifies the time zone to use and outputs location-specific timestamps in metrics.
## Only used if `csv_timestamp_format` is the Go reference time in one of the
## predefined layouts; unix formats are in UTC.
## Type: string
## Default: empty
## Possible values: a time zone name in TZ syntax. For a list of names, see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List.
csv_timezone = ""
## For more information, see [timestamps](/telegraf/v1/data_formats/input/csv/#timestamps).
## Specifies values to skip--for example, an empty string (`""`).
## Type: []string
## Default: empty
## The parser skips field values that match any of the specified values.
csv_skip_values = []
## Specifies whether to skip CSV lines that can't be parsed.
## Type: boolean
## Default: false
csv_skip_errors = false
## Specifies whether to reset the parser after each call.
## Type: string
## Default: "none"
## Possible values:
## - "none": Do not reset the parser.
## - "always": Reset the parser's state after reading each file in the gather
## cycle. If parsing by line, the setting is ignored.
## Resetting the parser state after parsing each file is helpful when reading
## full CSV structures that include headers or metadata.
csv_reset_mode = "none"
指标
使用默认配置,CSV 数据格式解析器为每个 CSV 行创建一个指标,并将 CSV 列添加为指标中的字段。字段的数据类型会根据其值自动确定(除非使用 csv_column_types
显式定义)。
数据格式配置选项允许您自定义解析器如何处理特定的 CSV 行、列和数据类型。
指标过滤 和 聚合器和处理器插件 提供了额外的数据转换选项——例如
- 使用指标过滤来跳过列和行。
- 使用 converter processor 将解析的元数据从标签转换为字段。
时间戳
每个指标都有一个时间戳——与字段关联的日期和时间。创建指标的默认时间戳是 UTC 中的当前时间。
要使用从 CSV 中提取的值作为指标的时间戳,请指定 csv_timestamp_column
和 csv_timestamp_format
选项。
csv_timestamp_column
csv_timestamp_column
选项指定 CSV 数据中包含时间值的键(列名),以提取并用作指标中的时间戳。
Unix 时间值可以是以下数据类型之一
- int64
- float64
- string
如果您为 csv_timestamp_format
指定 Go 格式,则时间戳列中的值必须是字符串。
当使用 "unix"
格式 时,允许使用可选的小数部分。其他 Unix 时间格式,例如 "unix_ms"
,不能有小数部分。
csv_timestamp_format
如果指定 csv_timestamp_column
,您还必须指定列中时间戳的格式。要指定格式,请将 csv_timestamp_format
设置为以下值之一
"unix"
"unix_ms"
"unix_us"
"unix_ns"
- 使用 Go 参考时间的 Go
time
常量 中的预定义布局——例如,"Mon Jan 2 15:04:05 MST 2006"
(UnixDate
格式字符串)。
有关时间格式的更多信息,请参阅以下内容
- Unix 时间文档
- Go time 包文档
时区
Telegraf 以 UTC 输出时间戳。
要在数据中解析具有位置感知的时间戳,请指定包含时区信息的 csv_timestamp_format
。
如果 csv_timestamp_column
中的时间戳包含时区偏移量,则解析器使用该偏移量来计算 UTC 中的时间戳。
如果 csv_timestamp_format
和您的时间戳数据包含时区缩写,则解析器会尝试将缩写解析为 IANA 时区数据库 中的位置,并返回该位置的 UTC 偏移量。要设置解析器在解析时区缩写时应使用的位置,请按照 互联网号码分配机构时区数据库 中的 TZ 语法为 csv_timezone
指定一个值。
在 Telegraf v1.27 之前,Telegraf 解析器忽略解析时间值中缩写的时区(例如,“EST”),并使用 UTC 作为时间戳位置。
示例
使用 RFC3339 格式从时间列中提取时间戳
配置
[agent]
omit_hostname = true
[[inputs.file]]
files = ["example"]
data_format = "csv"
csv_header_row_count = 1
csv_measurement_column = "measurement"
csv_timestamp_column = "time"
csv_timestamp_format = "2006-01-02T15:04:05Z07:00"
[[outputs.file]]
files = ["metrics.out"]
influx_sort_fields = true
输入
measurement,cpu,time_user,time_system,time_idle,time
cpu,cpu0,42,42,42,2018-09-13T13:03:28Z
输出
cpu cpu="cpu0",time_idle=42i,time_system=42i,time_user=42i 1536843808000000000
解析时间戳缩写
以下示例指定 csv_timezone
以解析输入数据中关联的时区 (EST
)
配置
[agent]
omit_hostname = true
[[inputs.file]]
files = ["example"]
data_format = "csv"
csv_header_row_count = 1
csv_measurement_column = "measurement"
csv_timestamp_column = "time"
csv_timestamp_format = "Mon, 02 Jan 2006 15:04:05 MST"
csv_timezone = "America/New_York"
[[outputs.file]]
files = ["metrics.out"]
influx_sort_fields = true
输入
measurement,cpu,time_user,time_system,time_idle,time
cpu,cpu1,42,42,42,"Mon, 02 Jan 2006 15:04:05 EST"
cpu,cpu1,42,42,42,"Mon, 02 Jan 2006 15:04:05 GMT"
解析器解析 GMT
和 EST
缩写并输出以下内容
cpu cpu="cpu1",time_idle=42i,time_system=42i,time_user=42i 1136232245000000000
cpu cpu="cpu1",time_idle=42i,time_system=42i,time_user=42i 1136214245000000000
时间戳分别表示以下日期
2006-01-02 20:04:05
2006-01-02 15:04:05
将元数据解析为标签
配置
[agent]
omit_hostname = true
[[inputs.file]]
files = ["example"]
data_format = "csv"
csv_measurement_column = "measurement"
csv_metadata_rows = 2
csv_metadata_separators = [":", "="]
csv_metadata_trim_set = "# "
csv_header_row_count = 1
csv_tag_columns = ["Version","cpu"]
csv_timestamp_column = "time"
csv_timestamp_format = "2006-01-02T15:04:05Z07:00"
[[outputs.file]]
files = ["metrics.out"]
influx_sort_fields = true
输入
# Version=1.1
# File Created: 2021-11-17T07:02:45+10:00
Version,measurement,cpu,time_user,time_system,time_idle,time
1.2,cpu,cpu0,42,42,42,2018-09-13T13:03:28Z
输出
cpu,File\ Created=2021-11-17T07:02:45+10:00,Version=1.1,cpu=cpu0 time_idle=42i,time_system=42i,time_user=42i 1536843808000000000
允许标签列值覆盖解析的元数据
配置
[agent]
omit_hostname = true
[[inputs.file]]
files = ["example"]
data_format = "csv"
csv_measurement_column = "measurement"
csv_metadata_rows = 2
csv_metadata_separators = [":", "="]
csv_metadata_trim_set = " #"
csv_header_row_count = 1
csv_tag_columns = ["Version","cpu"]
csv_tag_overwrite = true
csv_timestamp_column = "time"
csv_timestamp_format = "2006-01-02T15:04:05Z07:00"
[[outputs.file]]
files = ["metrics.out"]
influx_sort_fields = true
输入
# Version=1.1
# File Created: 2021-11-17T07:02:45+10:00
Version,measurement,cpu,time_user,time_system,time_idle,time
1.2,cpu,cpu0,42,42,42,2018-09-13T13:03:28Z
输出
cpu,File\ Created=2021-11-17T07:02:45+10:00,Version=1.2,cpu=cpu0 time_idle=42i,time_system=42i,time_user=42i 1536843808000000000
合并多个标题行
配置
[agent]
omit_hostname = true
[[inputs.file]]
files = ["example"]
data_format = "csv"
csv_comment = "#"
csv_header_row_count = 2
csv_measurement_column = "measurement"
csv_timestamp_column = "time"
csv_timestamp_format = "2006-01-02T15:04:05Z07:00"
[[outputs.file]]
## Files to write to.
files = ["metrics.out"]
## Use determinate ordering.
influx_sort_fields = true
输入
# Version=1.1
# File Created: 2021-11-17T07:02:45+10:00
Version,measurement,cpu,time,time,time,time
_system,,,_user,_system,_idle,
1.2,cpu,cpu0,42,42,42,2018-09-13T13:03:28Z
输出
cpu Version_system=1.2,cpu="cpu0",time_idle=42i,time_system=42i,time_user=42i 1536843808000000000
此页内容对您有帮助吗?
感谢您的反馈!