Skip to main content
Version: Tenzir v4.1

Configuration

Tenzir reads a configuration file at startup. Here is an example configuration that you can adapt to your needs.

# This is an example configuration file for Tenzir that shows all available
# options. Options in angle brackets have their default value determined at
# runtime.

# Options that concern Tenzir.
tenzir:
# The host and port to listen at and connect to.
endpoint: "localhost:5158"

# The timeout for connecting to a Tenzir server. Set to 0 seconds to wait
# indefinitely.
connection-timeout: 5m

# The delay between two connection attempts. Set to 0s to try connecting
# without retries.
connection-retry-delay: 3s

# The file system path used for persistent state.
db-directory: "tenzir.db"

# The file system path used for persistent state.
# Defaults to one of the following paths, selecting the first that is
# available:
# - $XDG_CACHE_HOME
# - $XDG_HOME_DIR/.cache/tenzir (linux) or $XDG_HOME_DIR/Libraries/caches/tenzir (mac)
# - $HOME/.cache/tenzir (linux) or $HOME/Libraries/caches/tenzir (mac)
# - $TEMPORARY_DIRECTORY/tenzir/cache
# To determine $TEMPORARY_DIRECTORY, the values of TMPDIR, TMP, TEMP, TEMPDIR are
# checked in that order, and as a last resort "/tmp" is used.
#cache-directory:

# The file system path used for log files.
log-file: "<db-directory>/server.log"

# The file system path used for client log files relative to the current
# working directory of the client. Note that this is disabled by default.
# If not specified no log files are written for clients at all.
client-log-file: "client.log"

# Format for printing individual log entries to the log-file.
# For a list of valid format specifiers, see spdlog format specification
# at https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
file-format: "[%Y-%m-%dT%T.%e%z] [%n] [%l] [%s:%#] %v"

# Configures the minimum severity of messages written to the log file.
# Possible values: quiet, error, warning, info, verbose, debug, trace.
# File logging is only available for commands that start a node (e.g.,
# tenzir-node). The levels above 'verbose' are usually not available in
# release builds.
file-verbosity: debug

# Whether to enable automatic log rotation. If set to false, a new log file
# will be created when the size of the current log file exceeds 10 MiB.
disable-log-rotation: false

# The size limit when a log file should be rotated.
log-rotation-threshold: 10MiB

# Maximum number of log messages in the logger queue.
log-queue-size: 1000000

# The sink type to use for console logging. Possible values: stderr,
# syslog, journald. Note that 'journald' can only be selected on linux
# systems, and only if Tenzir was built with journald support.
# The journald sink is used as default if Tenzir is started as a systemd
# service and the service is configured to use the journal for stderr,
# otherwise the default is the unstructured stderr sink.
#console-sink: stderr/journald

# Mode for console log output generation. Automatic renders color only when
# writing to a tty.
# Possible values: always, automatic, never. (default automatic)
console: automatic

# Format for printing individual log entries to the console. For a list
# of valid format specifiers, see spdlog format specification at
# https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
console-format: "%^[%T.%e] %v%$"

# Configures the minimum severity of messages written to the console.
# For a list of valid log levels, see file-verbosity.
console-verbosity: info

# List of directories to look for schema files in ascending order of
# priority.
schema-dirs: []

# Additional directories to load plugins specified using `tenzir.plugins`
# from.
plugin-dirs: []

# The plugins to load at startup. For relative paths, Tenzir tries to find
# the files in the specified `tenzir.plugin-dirs`. The special values
# 'bundled' and 'all' enable autoloading of bundled and all plugins
# respectively. Note: Add `example` or `/path/to/libtenzir-plugin-example.so`
# to load the example plugin.
plugins: []

# Names of plugins and builtins to explicitly forbid from being used in
# Tenzir. For example, adding `shell` will prohibit use of the `shell`
# operator builtin, and adding `kafka` will prohibit use of the `kafka`
# connector plugin. This allows more fine-grained control than the
# `tenzir.allow-unsafe-pipelines` option.
disable-plugins: []

# The unique ID of this node.
node-id: "node"

# Spawn a node instead of connecting to one.
node: false

# Allow unsafe location overrides for pipelines with the 'local' and 'remote'
# keywords, e.g., remotely reading from a file.
allow-unsafe-pipelines: false

# The size of an index shard, expressed in number of events. This should
# be a power of 2.
max-partition-size: 4194304

# Timeout after which an active partition is forcibly flushed, regardless of
# its size.
active-partition-timeout: 30 seconds

# Automatically rebuild undersized and outdated partitions in the background.
# The given number controls how much resources to spend on it. Set to 0 to
# disable.
automatic-rebuild: 1

# Timeout after which an automatic rebuild is triggered.
rebuild-interval: 2 hours

# The number of index shards that can be cached in memory.
max-resident-partitions: 1

# The number of index shards that are considered for the first evaluation
# round of a query.
max-taste-partitions: 5

# The amount of queries that can be executed in parallel.
max-queries: 10

# The directory to use for the partition synopses of the catalog.
#catalog-dir: <dbdir>/index

# Whether to run some components in dedicated threads, in particular
# the filesystem and catalog actors. It is usually a good idea to
# enable this setting for significantly better performance.
detach-components: true

# The store backend to use. Can be 'feather', or the name of a user-provided
# store plugin.
store-backend: feather

# Zstd compression level applied to both Feather and Parquet store backends.
# zstd-compression-level: <default>

# Interval between two aging cycles.
aging-frequency: 24h

# Query for aging out obsolete data.
aging-query:

# Keep track of performance metrics.
enable-metrics: false

# The configuration of the metrics reporting component.
metrics:
# Configures if and how metrics should be ingested back into Tenzir.
self-sink:
enable: true
slice-size: 128
# Configures if and where metrics should be written to a file.
file-sink:
enable: false
real-time: false
path: /tmp/tenzir-metrics.log
# Configures if and where metrics should be written to a socket.
uds-sink:
enable: false
real-time: false
path: /tmp/tenzir-metrics.sock
type: datagram

# The `index` key is used to adjust the false-positive rate of
# the first-level lookup data structures (called synopses) in the
# catalog. The lower the false-positive rate the more space will be
# required, so this setting can be used to manually tune the trade-off
# of performance vs. space.
index:
# The default false-positive rate for type synopses.
default-fp-rate: 0.01
# rules:
# Every rule adjusts the behaviour of Tenzir for a set of targets.
# Tenzir creates one synopsis per target. Targets can be either types
# or field names.
#
# fp-rate - false positive rate. Has effect on string and address type
# targets
#
# partition-index - Tenzir will not create dense index when set to false
# - targets: [:ip]
# fp-rate: 0.01

# The `tenzir-ctl start` command starts a new Tenzir server process.
start:

# Prints the endpoint for clients when the server is ready to accept
# connections. This comes in handy when letting the OS choose an
# available random port, i.e., when specifying 0 as port value.
print-endpoint: false

# An ordered list of commands to run inside the node after starting.
# As an example, to configure an auto-starting PCAP source that listens
# on the interface 'en0' and lives inside the Tenzir node, add `spawn
# source pcap -i en0`.
# Note that commands are not executed sequentially but in parallel.
commands: []

# Triggers removal of old data when the disk budget is exceeded.
disk-budget-high: 0GiB

# When the budget was exceeded, data is erased until the disk space is
# below this value.
disk-budget-low: 0GiB

# Seconds between successive disk space checks.
disk-budget-check-interval: 90

# When erasing, how many partitions to erase in one go before rechecking
# the size of the database directory.
disk-budget-step-size: 1

# Binary to use for checking the size of the database directory. If left
# unset, Tenzir will recursively add up the size of all files in the
# database directory to compute the size. Mainly useful for e.g.
# compressed filesystem where raw file size is not the correct metric.
# Must be the absolute path to an executable file, which will get passed
# the database directory as its first and only argument.
#disk-budget-check-binary: /opt/tenzir/libexec/tenzir-df-percent.sh


# The `tenzir-ctl count` command counts hits for a query without exporting
# data.
count:

# Estimate an upper bound by skipping candidate checks.
estimate: false

# The `tenzir-ctl dump` command prints configuration objects as JSON.
dump:
# Format output as YAML.
yaml: false

# The `tenzir-ctl export` command exports query results to stdout or a file.
export:
# Mark a query as continuous.
continuous: false

# Mark a query as unified.
unified: false

# Mark a query as low priority.
low-priority: false

# Dont substitute taxonomy identifiers.
disable-taxonomies: false

# Timeout to stop the export after.
#timeout: <infinite>

# The maximum number of events to export.
#max-events: <infinity>

# Path for reading the query or "-" for reading from stdin. Note: Setting
# this option in the config file creates a conflict with `tenzir-ctl export`
# with a positional query argument. This option is only listed here for
# completeness.
#read: '-'

# Path to write events to or "-" for writing to stdout.
write: '-'

# Treat the write option as a UNIX domain socket to connect to.
uds: false

# The `tenzir-ctl export json` command exports events formatted as JSONL
# (line- delimited JSON).
json:

# Flatten nested objects into the top-level object.
flatten: false

# Render durations as numbers as opposed to human-readable strings.
numeric-durations: false

# Omit null fields in JSON objects.
omit-nulls: false

# Omit empty records in JSON objects.
omit-empty-records: false

# Omit empty lists in JSON objects.
omit-empty-lists: false

# Omit empty maps in JSON objects.
omit-empty-maps: false

# Set a default value for the omit-nulls and omit-empty-* options.
omit-empty: false

# The `tenzir-ctl export pcap` command exports events in the PCAP format.
pcap:
# Flush to disk after this many packets.
flush-interval: 10000

# The `tenzir-ctl import` command imports data from stdin, files or over the
# network.
import:

# The maximum number of events to import.
#max-events: <infinity>

# Timeout after which buffered table slices are forwarded to the node.
batch-timeout: 1s

# Upper bound for the size of a table slice. A value of 0 causes the
# batch-size to be unbounded, leaving control of batching to the
# tenzir.import.read-timeout option only. This should be a power of 2.
batch-size: 65536

# Block until the importer forwarded all data.
blocking: false

# The amount of time that each read iteration waits for new input.
read-timeout: 20ms

# The endpoint to listen on ("[host]:port/type").
#listen: <none>

# Path to file to read events from or "-" for stdin.
read: '-'

# Treat the read option as a UNIX domain socket to connect to.
uds: false

# Path to an alternate schema.
#schema-file: <none>

# An alternate schema as a string.
#schema: <none>

# The `tenzir-ctl import csv` command imports comma-separated values.
csv:
# The single-character separator. Set this to ' ' to parse space-separated
# values, or '\t' to parse tab-separated values.
separator: ','

# The `tenzir-ctl import json` command imports JSONL data.
json:
# Read the event type from the given field (specify as
# '<field>[:<prefix>]').
#selector= <none>

# The `tenzir-ctl import pcap` command imports PCAP logs.
pcap:
# Network interface to read packets from.
#interface: <none>

# Skip flow packets after this many bytes.
#cutoff: <infinity>

# Number of concurrent flows to track.
max-flows: 1048576

# Maximum flow lifetime before eviction.
max-flow-age: 60

# Flow table expiration interval.
flow-expiry: 10

# Inverse factor by which to delay packets. For example, if 5, then for
# two packets spaced *t* seconds apart, the source will sleep for *t/5*
# seconds.
pseudo-realtime-factor: 0

# Snapshot length in bytes.
snaplen: 65535

# Disable computation of community id for every packet.
disable-community-id: false

# The `tenzir-ctl import test` command imports randomly generated events.
# Used for debugging and benchmarking only.
test:
# The PRNG seed.
seed: 0

# The `tenzir-ctl import zeek` command imports Zeek logs.
zeek:
# Flag to indicate whether the output should contain #open/#close tags.
# Zeek writes these tags in its logs such that users can gain insight
# when Zeek processed the corresponding data. By default, Tenzir
# does the same. Settings this flag to true skips printing these tags,
# which may help when fully deterministic output is desired.
disable-timestamp-tags: false

# The `tenzir-ctl status` command prints a JSON-formatted status summary of
# the node.
status:

# The timeout for components to report their status.
timeout: 10s

# Add more information to the output
detailed: false

# Include extra debug information
debug: false

# The below settings are internal to CAF, and aren't checked by Tenzir directly.
# Please be careful when changing these options. Note that some CAF options may
# be in conflict with Tenzir options, and are only listed here for completeness.
caf:

# Options affecting the internal scheduler.
scheduler:

# Accepted alternative: "sharing".
policy: stealing

# Configures whether the scheduler generates profiling output.
enable-profiling: false

# Output file for profiler data (only if profiling is enabled).
#profiling-output-file: </dev/null>

# Measurement resolution in milliseconds (only if profiling is enabled).
profiling-resolution: 100ms

# Forces a fixed number of threads if set. Defaults to the number of
# available CPU cores if starting a Tenzir node, or *2* for client commands.
#max-threads: <number of cores>

# Maximum number of messages actors can consume in one run.
max-throughput: 500

# When using "stealing" as scheduler policy.
work-stealing:

# Number of zero-sleep-interval polling attempts.
aggressive-poll-attempts: 100

# Frequency of steal attempts during aggressive polling.
aggressive-steal-interval: 10

# Number of moderately aggressive polling attempts.
moderate-poll-attempts: 500

# Frequency of steal attempts during moderate polling.
moderate-steal-interval: 5

# Sleep interval between poll attempts.
moderate-sleep-duration: 50us

# Frequency of steal attempts during relaxed polling.
relaxed-steal-interval: 1

# Sleep interval between poll attempts.
relaxed-sleep-duration: 10ms

stream:

# Maximum delay for partial batches.
max-batch-delay: 15ms

# Selects an implementation for credit computation.
# Accepted alternative: "token-based".
credit-policy: token-based

# When using "size-based" as credit-policy.
size-based-policy:

# Desired batch size in bytes.
bytes-per-batch: 32

# Maximum input buffer size in bytes.
buffer-capacity: 256

# Frequency of collecting batch sizes.
sampling-rate: 100

# Frequency of re-calibrations.
calibration-interval: 1

# Factor for discounting older samples.
smoothing-factor: 2.5

# When using "token-based" as credit-policy.
token-based-policy:

# Number of elements per batch.
batch-size: 1

# Max. number of elements in the input buffer.
buffer-size: 64

# Collecting metrics can be resource consuming. This section is used for
# filtering what should and what should not be collected
metrics-filters:

# Rules for actor based metrics filtering.
actors:

# List of selected actors for run-time metrics.
includes: []

# List of excluded actors from run-time metrics.
excludes: []

# User-defined operators.
operators:
# The Zeek operator is an example that takes raw bytes in the form of a
# PCAP and then parses Zeek's output via the `zeek-json` format to generate
# a stream of events.
zeek:
shell "zeek -r - LogAscii::output_to_stdout=T
JSONStreaming::disable_default_logs=T
JSONStreaming::enable_log_rotation=F
json-streaming-logs"
| read zeek-json
# The Suricata operator is analogous to the above Zeek example, with the
# difference that we are using Suricata. The commmand line configures
# Suricata such that it reads PCAP on stdin and produces EVE JSON logs on
# stdout, which we then parse with the `suricata` format.
suricata:
shell "suricata -r /dev/stdin
--set outputs.1.eve-log.filename=/dev/stdout
--set logging.outputs.0.console.enabled=no"
| read suricata