Configuration #

CoreOptions #

Core options for paimon.

Key	Default	Type	Description
aggregation.remove-record-on-delete	false	Boolean	Whether to remove the whole row in aggregation engine when -D records are received.
add-column-before-partition	false	Boolean	If true, when adding a new column without specifying a position, the column will be placed before the first partition column instead of at the end of the schema. This only takes effect for partitioned tables.
alter-column-null-to-not-null.disabled	true	Boolean	If true, it disables altering column type from null to not null. Default is true. Users can disable this option to explicitly convert null column type to not null.
async-file-write	true	Boolean	Whether to enable asynchronous IO writing when writing files.
auto-create	false	Boolean	Whether to create underlying storage when reading and writing the table.
blob-as-descriptor	false	Boolean	Write blob field using blob descriptor rather than blob bytes.
blob-descriptor-field	(none)	String	Comma-separated BLOB field names to store as serialized BlobDescriptor bytes inline in data files.
blob-external-storage-field	(none)	String	Comma-separated BLOB field names (must be a subset of 'blob-descriptor-field') whose raw data will be written to external storage at write time. The external storage path is configured via 'blob-external-storage-path'. Orphan file cleanup is not applied to that path.
blob-external-storage-path	(none)	String	The external storage path where raw BLOB data from fields configured by 'blob-external-storage-field' is written at write time. Orphan file cleanup is not applied to this path.
blob-field	(none)	String	Specifies column names that should be stored as blob type. This is used when you want to treat a BYTES column as a BLOB.
blob.split-by-file-size	(none)	Boolean	Whether to consider blob file size as a factor when performing scan splitting.
blob.target-file-size	(none)	MemorySize	Target size of a blob file. Default is value of TARGET_FILE_SIZE.
bucket	-1	Integer	Bucket number for file store. It should either be equal to -1 (dynamic bucket mode), -2 (postpone bucket mode), or it must be greater than 0 (fixed bucket mode).
bucket-append-ordered	true	Boolean	Whether to ignore the order of the buckets when reading data from an append-only table.
bucket-function.type	default	Enum	The bucket function for paimon bucket. Possible values: "default": The default bucket function which will use arithmetic: bucket_id = Math.abs(hash_bucket_binary_row % numBuckets) to get bucket. "mod": The modulus bucket function which will use modulus arithmetic: bucket_id = Math.floorMod(bucket_key_value, numBuckets) to get bucket. Note: the bucket key must be a single field of INT or BIGINT datatype. "hive": The hive bucket function which will use hive-compatible hash arithmetic to get bucket.
bucket-key	(none)	String	Specify the paimon distribution policy. Data is assigned to each bucket according to the hash value of bucket-key. If you specify multiple fields, delimiter is ','. If not specified, the primary key will be used; if there is no primary key, the full row will be used.
cache-page-size	64 kb	MemorySize	Memory page size for caching.
chain-table.enabled	false	Boolean	Whether enabled chain table.
changelog-file.compression	(none)	String	Changelog file compression.
changelog-file.format	(none)	String	Specify the message format of changelog files, currently parquet, avro and orc are supported.
changelog-file.prefix	"changelog-"	String	Specify the file name prefix of changelog files.
changelog-file.stats-mode	(none)	String	Changelog file metadata stats collection. none, counts, truncate(16), full is available.
changelog-producer	none	Enum	Whether to double write to a changelog file. This changelog file keeps the details of data changes, it can be read directly during stream reads. This can be applied to tables with primary keys. Possible values: "none": No changelog file. "input": Double write to a changelog file when flushing memory table, the changelog is from input. "full-compaction": Generate changelog files with each full compaction. "lookup": Generate changelog files through 'lookup' compaction.
changelog-producer.row-deduplicate	false	Boolean	Whether to generate -U, +U changelog for the same record. This configuration is only valid for the changelog-producer is lookup or full-compaction.
changelog-producer.row-deduplicate-ignore-fields	(none)	String	Fields that are ignored for comparison while generating -U, +U changelog for the same record. This configuration is only valid for the changelog-producer.row-deduplicate is true.
changelog.num-retained.max	(none)	Integer	The maximum number of completed changelog to retain. Should be greater than or equal to the minimum number.
changelog.num-retained.min	(none)	Integer	The minimum number of completed changelog to retain. Should be greater than or equal to 1.
changelog.time-retained	(none)	Duration	The maximum time of completed changelog to retain.
clustering.columns	(none)	String	Specifies the column name(s) used for comparison during range partitioning, in the format 'columnName1,columnName2'. If not set or set to an empty string, it indicates that the range partitioning feature is not enabled. This option will be effective only for append table without primary keys and batch execution mode.
clustering.history-partition.idle-to-full-sort	(none)	Duration	The duration after which a partition without new updates is considered a historical partition. Historical partitions will be automatically fully clustered during the cluster operation.
clustering.history-partition.limit	5	Integer	The limit of history partition number for automatically performing full clustering.
clustering.incremental	false	Boolean	Whether enable incremental clustering.
clustering.incremental.optimize-write	false	Boolean	Whether enable perform clustering before write phase when incremental clustering is enabled.
clustering.strategy	"auto"	String	Specifies the comparison algorithm used for range partitioning, including 'zorder', 'hilbert', and 'order', corresponding to the z-order curve algorithm, hilbert curve algorithm, and basic type comparison algorithm, respectively. When not configured, it will automatically determine the algorithm based on the number of columns in 'clustering.by-columns'. 'order' is used for 1 column, 'zorder' for less than 5 columns, and 'hilbert' for 5 or more columns.
commit.callback.#.param	(none)	String	Parameter string for the constructor of class #. Callback class should parse the parameter by itself.
commit.callbacks	(none)	String	A list of commit callback classes to be called after a successful commit. Class names are connected with comma (example: com.test.CallbackA,com.sample.CallbackB).
commit.discard-duplicate-files	false	Boolean	Whether discard duplicate files in commit.
commit.force-compact	false	Boolean	Whether to force a compaction before commit.
commit.force-create-snapshot	false	Boolean	In streaming job, whether to force creating snapshot when there is no data in this write-commit phase.
commit.max-retries	10	Integer	Maximum number of retries when commit failed.
commit.max-retry-wait	10 s	Duration	Max retry wait time when commit failed.
commit.min-retry-wait	10 ms	Duration	Min retry wait time when commit failed.
commit.strict-mode.last-safe-snapshot	(none)	Long	If set, committer will check if there are other commit user's snapshot starting from the snapshot after this one. If found a COMPACT / OVERWRITE snapshot, or found a APPEND snapshot which committed files to fixed bucket, commit will be aborted.If the value of this option is -1, committer will not check for its first commit.
commit.timeout	(none)	Duration	Timeout duration of retry when commit failed.
commit.user-prefix	(none)	String	Specifies the commit user prefix.
compaction.delete-ratio-threshold	0.2	Double	Ratio of the deleted rows in a data file to be forced compacted for append-only table.
compaction.force-rewrite-all-files	false	Boolean	Whether to force pick all files for a full compaction. Usually seen in a compaction task to external paths.
compaction.force-up-level-0	false	Boolean	If set to true, compaction strategy will always include all level 0 files in candidates.
compaction.incremental-size-threshold	(none)	MemorySize	When incremental size is bigger than this threshold, force a full compaction.
compaction.max-size-amplification-percent	200	Integer	The size amplification is defined as the amount (in percentage) of additional storage needed to store a single byte of data in the merge tree for changelog mode table.
compaction.min.file-num	5	Integer	For file set [f_0,...,f_N], the minimum file number to trigger a compaction for append-only table.
compaction.offpeak-ratio	0	Integer	Allows you to set a different (by default, more aggressive) percentage ratio for determining whether larger sorted run's size are included in compactions during off-peak hours. Works in the same way as compaction.size-ratio. Only applies if offpeak.start.hour and offpeak.end.hour are also enabled. For instance, if your cluster experiences low pressure between 2 AM and 6 PM , you can configure `compaction.offpeak.start.hour=2` and `compaction.offpeak.end.hour=18` to define this period as off-peak hours. During these hours, you can increase the off-peak compaction ratio (e.g. `compaction.offpeak-ratio=20`) to enable more aggressive data compaction
compaction.offpeak.end.hour	-1	Integer	The end of off-peak hours, expressed as an integer between 0 and 23, exclusive. Set to -1 to disable off-peak.
compaction.offpeak.start.hour	-1	Integer	The start of off-peak hours, expressed as an integer between 0 and 23, inclusive Set to -1 to disable off-peak
compaction.optimization-interval	(none)	Duration	Implying how often to perform an optimization compaction, this configuration is used to ensure the query timeliness of the read-optimized system table.
compaction.size-ratio	1	Integer	Percentage flexibility while comparing sorted run size for changelog mode table. If the candidate sorted run(s) size is 1% smaller than the next sorted run's size, then include next sorted run into this candidate set.
compaction.total-size-threshold	(none)	MemorySize	When total size is smaller than this threshold, force a full compaction.
consumer-id	(none)	String	Consumer id for recording the offset of consumption in the storage.
consumer.expiration-time	(none)	Duration	The expiration interval of consumer files. A consumer file will be expired if it's lifetime after last modification is over this value.
consumer.ignore-progress	false	Boolean	Whether to ignore consumer progress for the newly started job.
consumer.mode	exactly-once	Enum	Specify the consumer consistency mode for table. Possible values: "exactly-once": Readers consume data at snapshot granularity, and strictly ensure that the snapshot-id recorded in the consumer is the snapshot-id + 1 that all readers have exactly consumed. "at-least-once": Each reader consumes snapshots at a different rate, and the snapshot with the slowest consumption progress among all readers will be recorded in the consumer.
continuous.discovery-interval	10 s	Duration	The discovery interval of continuous reading.
cross-partition-upsert.bootstrap-parallelism	10	Integer	The parallelism for bootstrap in a single task for cross partition upsert.
cross-partition-upsert.index-ttl	(none)	Duration	The TTL in rocksdb index for cross partition upsert (primary keys not contain all partition fields), this can avoid maintaining too many indexes and lead to worse and worse performance, but please note that this may also cause data duplication.
data-evolution.enabled	false	Boolean	Whether enable data evolution for row tracking table.
data-file.external-paths	(none)	String	The external paths where the data of this table will be written, multiple elements separated by commas.
data-file.external-paths.specific-fs	(none)	String	The specific file system of the external path when data-file.external-paths.strategy is set to specific-fs, should be the prefix scheme of the external path, now supported are s3 and oss.
data-file.external-paths.strategy	none	Enum	The strategy of selecting an external path when writing data. Possible values: "none": Do not choose any external storage, data will still be written to the default warehouse path. "specific-fs": Select a specific file system as the external path. Currently supported are S3 and OSS. "round-robin": When writing a new file, a path is chosen from data-file.external-paths in turn. "entropy-inject": When writing a new file, a path is chosen based on the hash value of the file's content. "weight-robin": When writing a new file, a path is chosen based on configured weights.
data-file.external-paths.weights	(none)	String	The weights for external paths when data-file.external-paths.strategy is set to weight-robin. Format: 'weight1,weight2,...' with weights corresponding to paths in data-file.external-paths by order. Example: '10,5,15' means first path has weight 10, second 5, third 15. Weights must be positive integers.
data-file.path-directory	(none)	String	Specify the path directory of data files.
data-file.prefix	"data-"	String	Specify the file name prefix of data files.
data-file.thin-mode	false	Boolean	Enable data file thin mode to avoid duplicate columns storage.
delete.force-produce-changelog	false	Boolean	Force produce changelog in delete sql, or you can use 'streaming-read-overwrite' to read changelog from overwrite commit.
deletion-vector.index-file.target-size	2 mb	MemorySize	The target size of deletion vector index file.
deletion-vectors.bitmap64	false	Boolean	Enable 64 bit bitmap implementation. Note that only 64 bit bitmap implementation is compatible with Iceberg.
deletion-vectors.enabled	false	Boolean	Whether to enable deletion vectors mode. In this mode, index files containing deletion vectors are generated when data is written, which marks the data for deletion. During read operations, by applying these index files, merging can be avoided.
deletion-vectors.modifiable	false	Boolean	Whether to enable modifying deletion vectors mode.
disable-explicit-type-casting	false	Boolean	If true, it disables explicit type casting. For ex: it disables converting LONG type to INT type. Users can enable this option to disable explicit type casting
dynamic-bucket.assigner-parallelism	(none)	Integer	Parallelism of assigner operator for dynamic bucket mode, it is related to the number of initialized bucket, too small will lead to insufficient processing speed of assigner.
dynamic-bucket.initial-buckets	(none)	Integer	Initial buckets for a partition in assigner operator for dynamic bucket mode.
dynamic-bucket.max-buckets	-1	Integer	Max buckets for a partition in dynamic bucket mode, It should either be equal to -1 (unlimited), or it must be greater than 0 (fixed upper bound).
dynamic-bucket.target-row-num	2000000	Long	If the bucket is -1, for primary key table, is dynamic bucket mode, this option controls the target row number for one bucket.
dynamic-partition-overwrite	true	Boolean	Whether only overwrite dynamic partition when overwriting a partitioned table with dynamic partition columns. Works only when the table has partition keys.
end-input.check-partition-expire	false	Boolean	Optional endInput check partition expire used in case of batch mode or bounded stream.
fields.default-aggregate-function	(none)	String	Default aggregate function of all fields for partial-update and aggregate merge function.
file-index.in-manifest-threshold	500 bytes	MemorySize	The threshold to store file index bytes in manifest.
file-index.read.enabled	true	Boolean	Whether enabled read file index.
file-operation.thread-num	(none)	Integer	The maximum number of concurrent file operations. By default is the number of processors available to the Java virtual machine.
file-reader-async-threshold	10 mb	MemorySize	The threshold for read file async.
file.block-size	(none)	MemorySize	File block size of format, default value of orc stripe is 64 MB, and parquet row group is 128 MB.
file.compression	"zstd"	String	Default file compression. For faster read and write, it is recommended to use zstd.
file.compression.per.level		Map	Define different compression policies for different level, you can add the conf like this: 'file.compression.per.level' = '0:lz4,1:zstd'.
file.compression.zstd-level	1	Integer	Default file compression zstd level. For higher compression rates, it can be configured to 9, but the read and write speed will significantly decrease.
file.format	"parquet"	String	Specify the message format of data files, currently orc, parquet and avro are supported.
file.format.per.level		Map	Define different file format for different level, you can add the conf like this: 'file.format.per.level' = '0:avro,3:parquet', if the file format for level is not provided, the default format which set by `file.format` will be used.
file.suffix.include.compression	false	Boolean	Whether to add file compression type in the file name of data file and changelog file.
force-lookup	false	Boolean	Whether to force the use of lookup for compaction.
format-table.commit-hive-sync-url	(none)	String	Format table commit hive sync uri.
format-table.file.compression	(none)	String	Format table file compression.
format-table.implementation	paimon	Enum	Format table uses paimon or engine. Possible values: "paimon": Paimon format table implementation. "engine": Engine format table implementation.
format-table.partition-path-only-value	false	Boolean	Format table file path only contain partition value.
full-compaction.delta-commits	(none)	Integer	For streaming write, full compaction will be constantly triggered after delta commits. For batch write, full compaction will be triggered with each commit as long as this value is greater than 0.
global-index.column-update-action	THROW_ERROR	Enum	Defines the action to take when an update modifies columns that are covered by a global index. Possible values: "THROW_ERROR" "DROP_PARTITION_INDEX"
global-index.enabled	true	Boolean	Whether to enable global index for scan.
global-index.external-path	(none)	String	Global index root directory, if not set, the global index files will be stored under the <table-root-directory>/index.
global-index.row-count-per-shard	100000	Long	Row count per shard for global index.
global-index.thread-num	(none)	Integer	The maximum number of concurrent scanner for global index.By default is the number of processors available to the Java virtual machine.
ignore-delete	false	Boolean	Whether to ignore delete records.
ignore-update-before	false	Boolean	Whether to ignore update-before records.
incremental-between	(none)	String	Read incremental changes between start snapshot (exclusive) and end snapshot (inclusive), for example, '5,10' means changes between snapshot 5 and snapshot 10.
incremental-between-scan-mode	auto	Enum	Scan kind when Read incremental changes between start snapshot (exclusive) and end snapshot (inclusive). Possible values: "auto": Scan changelog files for the table which produces changelog files. Otherwise, scan newly changed files. "delta": Scan newly changed files between snapshots. "changelog": Scan changelog files between snapshots. "diff": Get diff by comparing data of end snapshot with data of start snapshot.
incremental-between-tag-to-snapshot	false	Boolean	Whether to read incremental changes between the snapshot corresponding to the tag.
incremental-between-timestamp	(none)	String	Read incremental changes between start timestamp (exclusive) and end timestamp (inclusive), for example, 't1,t2' means changes between timestamp t1 and timestamp t2.
incremental-to-auto-tag	(none)	String	Used to specify the end tag (inclusive), and Paimon will find an earlier tag and return changes between them. If the tag doesn't exist or the earlier tag doesn't exist, return empty. This option requires 'tag.creation-period' and 'tag.period-formatter' configured.
index-file-in-data-file-dir	false	Boolean	Whether index file in data file directory.
local-merge-buffer-size	(none)	MemorySize	Local merge will buffer and merge input records before they're shuffled by bucket and written into sink. The buffer will be flushed when it is full. Mainly to resolve data skew on primary keys. We recommend starting with 64 mb when trying out this feature.
local-sort.max-num-file-handles	128	Integer	The maximal fan-in for external merge sort. It limits the number of file handles. If it is too small, may cause intermediate merging. But if it is too large, it will cause too many files opened at the same time, consume memory and lead to random reading.
lookup-compact	RADICAL	Enum	Lookup compact mode used for lookup compaction. Possible values: "RADICAL" "GENTLE"
lookup-compact.max-interval	(none)	Integer	The max interval for a gentle mode lookup compaction to be triggered. For every interval, a forced lookup compaction will be performed to flush L0 files to higher level. This option is only valid when lookup-compact mode is gentle.
lookup-wait	true	Boolean	When need to lookup, commit will wait for compaction by lookup.
lookup.cache-file-retention	1 h	Duration	The cached files retention time for lookup. After the file expires, if there is a need for access, it will be re-read from the DFS to build an index on the local disk.
lookup.cache-max-disk-size	infinite	MemorySize	Max disk size for lookup cache, you can use this option to limit the use of local disks.
lookup.cache-max-memory-size	256 mb	MemorySize	Max memory size for lookup cache.
lookup.cache-spill-compression	"zstd"	String	Spill compression for lookup cache, currently zstd, none, lz4 and lzo are supported.
lookup.cache.bloom.filter.enabled	true	Boolean	Whether to enable the bloom filter for lookup cache.
lookup.cache.bloom.filter.fpp	0.05	Double	Define the default false positive probability for lookup cache bloom filters.
lookup.cache.high-priority-pool-ratio	0.25	Double	The fraction of cache memory that is reserved for high-priority data like index, filter.
lookup.hash-load-factor	0.75	Float	The index load factor for lookup.
lookup.merge-buffer-size	8 mb	MemorySize	Buffer memory size for one key merging in lookup.
lookup.merge-records-threshold	1024	Integer	Threshold for merging records to binary buffer in lookup.
lookup.remote-file.enabled	false	Boolean	Whether to enable the remote file for lookup.
lookup.remote-file.level-threshold	-2147483648	Integer	Level threshold of lookup to generate remote lookup files. Level files below this threshold will not generate remote lookup files.
manifest.compression	"zstd"	String	Default file compression for manifest.
manifest.delete-file-drop-stats	false	Boolean	For DELETE manifest entry in manifest file, drop stats to reduce memory and storage. Default value is false only for compatibility of old reader.
manifest.format	"avro"	String	Specify the message format of manifest files.
manifest.full-compaction-threshold-size	16 mb	MemorySize	The size threshold for triggering full compaction of manifest.
manifest.merge-min-count	30	Integer	To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge.
manifest.target-file-size	8 mb	MemorySize	Suggested file size of a manifest file.
merge-engine	deduplicate	Enum	Specify the merge engine for table with primary key. Possible values: "deduplicate": De-duplicate and keep the last row. "partial-update": Partial update non-null fields. "aggregation": Aggregate fields with same primary key. "first-row": De-duplicate and keep the first row.
metadata.stats-dense-store	true	Boolean	Whether to store statistic densely in metadata (manifest files), which will significantly reduce the storage size of metadata when the none statistic mode is set. Note, when this mode is enabled with 'metadata.stats-mode:none', the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.
metadata.stats-keep-first-n-columns	-1	Integer	Define how many columns' stats are kept in metadata file from front to end. Default value '-1' means ignoring this config.
metadata.stats-mode	"truncate(16)"	String	The mode of metadata stats collection. none, counts, truncate(16), full is available. "none": means disable the metadata stats collection. "counts" means only collect the null count. "full": means collect the null count, min/max value. "truncate(16)": means collect the null count, min/max value with truncated length of 16. Field level stats mode can be specified by fields.{field_name}.stats-mode
metadata.stats-mode.per.level		Map	Define different 'metadata.stats-mode' for different level, you can add the conf like this: 'metadata.stats-mode.per.level' = '0:none', if the metadata.stats-mode for level is not provided, the default mode which set by `metadata.stats-mode` will be used.
metastore.partitioned-table	false	Boolean	Whether to create this table as a partitioned table in metastore. For example, if you want to list all partitions of a Paimon table in Hive, you need to create this table as a partitioned table in Hive metastore. This config option does not affect the default filesystem metastore.
metastore.tag-to-partition	(none)	String	Whether to create this table as a partitioned table for mapping non-partitioned table tags in metastore. This allows the Hive engine to view this table in a partitioned table view and use partitioning field to read specific partitions (specific tags).
metastore.tag-to-partition.preview	none	Enum	Whether to preview tag of generated snapshots in metastore. This allows the Hive engine to query specific tag before creation. Possible values: "none": No automatically created tags. "process-time": Based on the time of the machine, create TAG once the processing time passes period time plus delay. "watermark": Based on the watermark of the input, create TAG once the watermark passes period time plus delay. "batch": In the batch processing scenario, the tag corresponding to the current snapshot is generated after the task is completed.
num-levels	(none)	Integer	Total level number, for example, there are 3 levels, including 0,1,2 levels.
num-sorted-run.compaction-trigger	5	Integer	The sorted run number to trigger compaction. Includes level0 files (one file one sorted run) and high-level runs (one level one sorted run).
num-sorted-run.stop-trigger	(none)	Integer	The number of sorted runs that trigger the stopping of writes, the default value is 'num-sorted-run.compaction-trigger' + 3.
overwrite-upgrade	true	Boolean	Whether to try upgrading the data files after overwriting a primary key table.
page-size	64 kb	MemorySize	Memory page size.
parquet.enable.dictionary	(none)	Integer	Turn off the dictionary encoding for all fields in parquet.
partial-update.remove-record-on-delete	false	Boolean	Whether to remove the whole row in partial-update engine when -D records are received.
partial-update.remove-record-on-sequence-group	(none)	String	When -D records of the given sequence groups are received, remove the whole row.
partition	(none)	String	Define partition by table options, cannot define partition on DDL and table options at the same time.
partition.default-name	"__DEFAULT_PARTITION__"	String	The default partition name in case the dynamic partition column value is null/empty string.
partition.end-input-to-done	false	Boolean	Whether mark the done status to indicate that the data is ready when end input.
partition.expiration-batch-size	(none)	Integer	The batch size of partition expiration. By default, all partitions to be expired will be expired together, which may cause a risk of out-of-memory. Use this parameter to divide partition expiration process and mitigate memory pressure.
partition.expiration-check-interval	1 h	Duration	The check interval of partition expiration.
partition.expiration-max-num	100	Integer	The default deleted num of partition expiration.
partition.expiration-strategy	"values-time"	String	The strategy determines how to extract the partition time and compare it with the current time. "values-time": This strategy compares the time extracted from the partition value with the current time. "update-time": This strategy compares the last update time of the partition with the current time.
partition.expiration-time	(none)	Duration	The expiration interval of a partition. A partition will be expired if it's lifetime is over this value. Partition time is extracted from the partition value.
partition.idle-time-to-report-statistic	0 ms	Duration	Set a time duration when a partition has no new data after this time duration, start to report the partition statistics to hms.
partition.legacy-name	true	Boolean	The legacy partition name is using `toString` fpr all types. If false, using cast to string for all types.
partition.mark-done-action	"success-file"	String	Action to mark a partition done is to notify the downstream application that the partition has finished writing, the partition is ready to be read. 1. 'success-file': add '_success' file to directory. 2. 'done-partition': add 'xxx.done' partition to metastore. 3. 'mark-event': mark partition event to metastore. 4. 'http-report': report partition mark done to remote http server. 5. 'custom': use policy class to create a mark-partition policy. Both can be configured at the same time: 'done-partition,success-file,mark-event,custom'.
partition.mark-done-action.custom.class	(none)	String	The partition mark done class for implement PartitionMarkDoneAction interface. Only work in custom mark-done-action.
partition.mark-done-action.http.params	(none)	String	Http client request parameters will be written to the request body, this can only be used by http-report partition mark done action.
partition.mark-done-action.http.url	(none)	String	Mark done action will reports the partition to the remote http server, this can only be used by http-report partition mark done action.
partition.sink-strategy	NONE	Enum	This is only for partitioned append table or postpone pk table, and the purpose is to reduce small files and improve write performance. Through this repartitioning strategy to reduce the number of partitions written by each task to as few as possible. none: Rebalanced or Forward partitioning, this is the default behavior, this strategy is suitable for the number of partitions you write in a batch is much smaller than write parallelism. hash: Hash the partitions value, this strategy is suitable for the number of partitions you write in a batch is greater equals than write parallelism. Possible values: "NONE" "HASH"
partition.timestamp-formatter	(none)	String	The formatter to format timestamp from string. It can be used with 'partition.timestamp-pattern' to create a formatter using the specified value. Default formatter is 'yyyy-MM-dd HH:mm:ss' and 'yyyy-MM-dd'. Supports multiple partition fields like '$year-$month-$day $hour:00:00'. The timestamp-formatter is compatible with Java's DateTimeFormatter.
partition.timestamp-pattern	(none)	String	You can specify a pattern to get a timestamp from partitions. The formatter pattern is defined by 'partition.timestamp-formatter'. By default, read from the first field. If the timestamp in the partition is a single field called 'dt', you can use '$dt'. If it is spread across multiple fields for year, month, day, and hour, you can use '$year-$month-$day $hour:00:00'. If the timestamp is in fields dt and hour, you can use '$dt $hour:00:00'.
postpone.batch-write-fixed-bucket	true	Boolean	Whether to write the data into fixed bucket for batch writing a postpone bucket table.
postpone.batch-write-fixed-bucket.max-parallelism	2048	Integer	The number of partitions for global index.
postpone.default-bucket-num	1	Integer	Bucket number for the partitions compacted for the first time in postpone bucket tables.
primary-key	(none)	String	Define primary key by table options, cannot define primary key on DDL and table options at the same time.
query-auth.enabled	false	Boolean	Enable query auth to give Catalog the opportunity to perform column level and row level permission validation on queries.
read.batch-size	1024	Integer	Read batch size for any file format if it supports.
record-level.expire-time	(none)	Duration	Record level expire time for primary key table, expiration happens in compaction, there is no strong guarantee to expire records in time. You must specific 'record-level.time-field' too.
record-level.time-field	(none)	String	Time field for record level expire. It supports the following types: `timestamps in seconds with INT`,`timestamps in seconds with BIGINT`, `timestamps in milliseconds with BIGINT` or `timestamp`.
row-tracking.enabled	false	Boolean	Whether enable unique row id for append table.
rowkind.field	(none)	String	The field that generates the row kind for primary key table, the row kind determines which data is '+I', '-U', '+U' or '-D'.
scan.bounded.watermark	(none)	Long	End condition "watermark" for bounded streaming mode. Stream reading will end when a larger watermark snapshot is encountered.
scan.creation-time-millis	(none)	Long	Optional timestamp used in case of "from-creation-timestamp" scan mode.
scan.fallback-branch	(none)	String	When a batch job queries from a table, if a partition does not exist in the current branch, the reader will try to get this partition from this fallback branch.
scan.fallback-delta-branch	(none)	String	When a batch job queries from a chain table, if a partition does not exist in either main or snapshot branch, the reader will try to get this partition from chain snapshot and delta branch together.
scan.fallback-snapshot-branch	(none)	String	When a batch job queries from a chain table, if a partition does not exist in the main branch, the reader will try to get this partition from chain snapshot branch.
scan.file-creation-time-millis	(none)	Long	After configuring this time, only the data files created after this time will be read. It is independent of snapshots, but it is imprecise filtering (depending on whether or not compaction occurs).
scan.ignore-corrupt-files	false	Boolean	Ignore corrupt files while scanning.
scan.ignore-lost-files	false	Boolean	Ignore lost files while scanning.
scan.manifest.parallelism	(none)	Integer	The parallelism of scanning manifest files, default value is the size of cpu processor. Note: Scale-up this parameter will increase memory usage while scanning manifest files. We can consider downsize it when we encounter an out of memory exception while scanning
scan.max-splits-per-task	10	Integer	Max split size should be cached for one task while scanning. If splits size cached in enumerator are greater than tasks size multiply by this value, scanner will pause scanning.
scan.mode	default	Enum	Specify the scanning behavior of the source. Possible values: "default": Determines actual startup mode according to other table properties. If "scan.timestamp-millis" is set the actual startup mode will be "from-timestamp", and if "scan.snapshot-id" or "scan.tag-name" is set the actual startup mode will be "from-snapshot". Otherwise the actual startup mode will be "latest-full". "latest-full": For streaming sources, produces the latest snapshot on the table upon first startup, and continue to read the latest changes. For batch sources, just produce the latest snapshot but does not read new changes. "full": Deprecated. Same as "latest-full". "latest": For streaming sources, continuously reads latest changes without producing a snapshot at the beginning. For batch sources, behaves the same as the "latest-full" startup mode. "compacted-full": For streaming sources, produces a snapshot after the latest compaction on the table upon first startup, and continue to read the latest changes. For batch sources, just produce a snapshot after the latest compaction but does not read new changes. Snapshots of full compaction are picked when scheduled full-compaction is enabled. "from-timestamp": For streaming sources, continuously reads changes starting from timestamp specified by "scan.timestamp-millis", without producing a snapshot at the beginning. For batch sources, produces a snapshot at timestamp specified by "scan.timestamp-millis" but does not read new changes. "from-creation-timestamp": For streaming sources and batch sources, If timestamp specified by "scan.creation-time-millis" is during in the range of earliest snapshot and latest snapshot: mode is from-snapshot which snapshot is equal or later the timestamp. If timestamp is earlier than earliest snapshot or later than latest snapshot, mode is from-file-creation-time. "from-file-creation-time": For streaming and batch sources, consumes a snapshot and filters the data files by creation time. For streaming sources, upon first startup, and continue to read the latest changes. "from-snapshot": For streaming sources, continuously reads changes starting from snapshot specified by "scan.snapshot-id", without producing a snapshot at the beginning. For batch sources, produces a snapshot specified by "scan.snapshot-id" or "scan.tag-name" but does not read new changes. "from-snapshot-full": For streaming sources, produces from snapshot specified by "scan.snapshot-id" on the table upon first startup, and continuously reads changes. For batch sources, produces a snapshot specified by "scan.snapshot-id" but does not read new changes. "incremental": Read incremental changes between start and end snapshot or timestamp.
scan.plan-sort-partition	false	Boolean	Whether to sort plan files by partition fields, this allows you to read according to the partition order, even if your partition writes are out of order. It is recommended that you use this for streaming read of the 'append-only' table. By default, streaming read will read the full snapshot first. In order to avoid the disorder reading for partitions, you can open this option.
scan.snapshot-id	(none)	Long	Optional snapshot id used in case of "from-snapshot" or "from-snapshot-full" scan mode
scan.tag-name	(none)	String	Optional tag name used in case of "from-snapshot" scan mode.
scan.timestamp	(none)	String	Optional timestamp used in case of "from-timestamp" scan mode, it will be automatically converted to timestamp in unix milliseconds, use local time zone
scan.timestamp-millis	(none)	Long	Optional timestamp used in case of "from-timestamp" scan mode. If there is no snapshot earlier than this time, the earliest snapshot will be chosen.
scan.watermark	(none)	Long	Optional watermark used in case of "from-snapshot" scan mode. If there is no snapshot later than this watermark, will throw an exceptions.
sequence.field	(none)	String	The field that generates the sequence number for primary key table, the sequence number determines which data is the most recent.
sequence.field.sort-order	ascending	Enum	Specify the order of sequence.field. Possible values: "ascending": specifies sequence.field sort order is ascending. "descending": specifies sequence.field sort order is descending.
sink.process-time-zone	(none)	String	The time zone to parse the long process time to TIMESTAMP value. The default value is JVM's default time zone. If you want to specify a time zone, you should either set a full name such as 'America/Los_Angeles' or a custom zone id such as 'GMT-08:00'. This option currently is used for extract tag name.
sink.watermark-time-zone	"UTC"	String	The time zone to parse the long watermark value to TIMESTAMP value. The default value is 'UTC', which means the watermark is defined on TIMESTAMP column or not defined. If the watermark is defined on TIMESTAMP_LTZ column, the time zone of watermark is user configured time zone, the value should be the user configured local time zone. The option value is either a full name such as 'America/Los_Angeles', or a custom timezone id such as 'GMT-08:00'.
snapshot.clean-empty-directories	false	Boolean	Whether to try to clean empty directories when expiring snapshots, if enabled, please note: hdfs: may print exceptions in NameNode. oss/s3: may cause performance issue.
snapshot.expire.execution-mode	sync	Enum	Specifies the execution mode of expire. Possible values: "sync": Execute expire synchronously. If there are too many files, it may take a long time and block stream processing. "async": Execute expire asynchronously. If the generation of snapshots is greater than the deletion, there will be a backlog of files.
snapshot.expire.limit	50	Integer	The maximum number of snapshots allowed to expire at a time.
snapshot.ignore-empty-commit	(none)	Boolean	Whether ignore empty commit.
snapshot.num-retained.max	infinite	Integer	The maximum number of completed snapshots to retain. Should be greater than or equal to the minimum number.
snapshot.num-retained.min	10	Integer	The minimum number of completed snapshots to retain. Should be greater than or equal to 1.
snapshot.time-retained	1 h	Duration	The maximum time of completed snapshots to retain.
snapshot.watermark-idle-timeout	(none)	Duration	In watermarking, if a source remains idle beyond the specified timeout duration, it triggers snapshot advancement and facilitates tag creation.
sort-compaction.local-sample.magnification	1000	Integer	The magnification of local sample for sort-compaction.The size of local sample is sink parallelism * magnification.
sort-compaction.range-strategy	SIZE	Enum	The range strategy of sort compaction, the default value is quantity. If the data size allocated for the sorting task is uneven,which may lead to performance bottlenecks, the config can be set to size. Possible values: "SIZE" "QUANTITY"
sort-engine	loser-tree	Enum	Specify the sort engine for table with primary key. Possible values: "min-heap": Use min-heap for multiway sorting. "loser-tree": Use loser-tree for multiway sorting. Compared with heapsort, loser-tree has fewer comparisons and is more efficient.
sort-spill-buffer-size	64 mb	MemorySize	Amount of data to spill records to disk in spilled sort.
sort-spill-threshold	(none)	Integer	If the maximum number of sort readers exceeds this value, a spill will be attempted. This prevents too many readers from consuming too much memory and causing OOM.
source.split.open-file-cost	4 mb	MemorySize	Open file cost of a source file. It is used to avoid reading too many files with a source split, which can be very slow.
source.split.target-size	128 mb	MemorySize	Target size of a source split when scanning a bucket.
spill-compression	"zstd"	String	Compression for spill, currently zstd, lzo and zstd are supported.
spill-compression.zstd-level	1	Integer	Default spill compression zstd level. For higher compression rates, it can be configured to 9, but the read and write speed will significantly decrease.
streaming-read-append-overwrite	false	Boolean	Whether to read the delta from append table's overwrite commit in streaming mode.
streaming-read-overwrite	false	Boolean	Whether to read the changes from overwrite in streaming mode. Cannot be set to true when changelog producer is full-compaction or lookup because it will read duplicated changes.
streaming.read.snapshot.delay	(none)	Duration	The delay duration of stream read when scan incremental snapshots.
table-read.sequence-number.enabled	false	Boolean	Whether to include the _SEQUENCE_NUMBER field when reading the audit_log or binlog system tables. This is only valid for primary key tables.
tag.automatic-completion	false	Boolean	Whether to automatically complete missing tags.
tag.automatic-creation	none	Enum	Whether to create tag automatically. And how to generate tags. Possible values: "none": No automatically created tags. "process-time": Based on the time of the machine, create TAG once the processing time passes period time plus delay. "watermark": Based on the watermark of the input, create TAG once the watermark passes period time plus delay. "batch": In the batch processing scenario, the tag corresponding to the current snapshot is generated after the task is completed.
tag.batch.customized-name	(none)	String	Use customized name when creating tags in Batch mode.
tag.callback.#.param	(none)	String	Parameter string for the constructor of class #. Callback class should parse the parameter by itself.
tag.callbacks	(none)	String	A list of commit callback classes to be called after a successful tag. Class names are connected with comma (example: com.test.CallbackA,com.sample.CallbackB).
tag.create-success-file	false	Boolean	Whether to create tag success file for new created tags.
tag.creation-delay	0 ms	Duration	How long is the delay after the period ends before creating a tag. This can allow some late data to enter the Tag.
tag.creation-period	daily	Enum	What frequency is used to generate tags. Possible values: "daily": Generate a tag every day. "hourly": Generate a tag every hour. "two-hours": Generate a tag every two hours.
tag.creation-period-duration	(none)	Duration	The period duration for tag auto create periods.If user set it, tag.creation-period would be invalid.
tag.default-time-retained	(none)	Duration	The default maximum time retained for newly created tags. It affects both auto-created tags and manually created (by procedure) tags.
tag.num-retained-max	(none)	Integer	The maximum number of tags to retain. It only affects auto-created tags.
tag.period-formatter	with_dashes	Enum	The date format for tag periods. Possible values: "with_dashes": Dates and hours with dashes, e.g., 'yyyy-MM-dd HH' "without_dashes": Dates and hours without dashes, e.g., 'yyyyMMdd HH' "without_dashes_and_spaces": Dates and hours without dashes and spaces, e.g., 'yyyyMMddHH'
tag.time-expire-enabled	true	Boolean	Whether to enable tag expiration by retained time.
target-file-size	(none)	MemorySize	Target size of a file. primary key table: the default value is 128 MB. append table: the default value is 256 MB.
type	table	Enum	Type of the table. Possible values: "table": Normal Paimon table. "format-table": A file format table refers to a directory that contains multiple files of the same format. "materialized-table": A materialized table combines normal Paimon table and materialized SQL. "object-table": An object table combines normal Paimon table and object location. "lance-table": A lance table, see 'https://lancedb.github.io/lance/'. "iceberg-table": An iceberg table, see 'https://iceberg.apache.org/'.
upsert-key	(none)	String	Define upsert key to do MERGE INTO when executing INSERT INTO, cannot be defined with primary key.
variant.inferShreddingSchema	false	Boolean	Whether to automatically infer the shredding schema when writing Variant columns.
variant.shredding.maxInferBufferRow	4096	Integer	Maximum number of rows to buffer for schema inference.
variant.shredding.maxSchemaDepth	50	Integer	Maximum traversal depth in Variant values during schema inference.
variant.shredding.maxSchemaWidth	300	Integer	Maximum number of shredded fields allowed in an inferred schema.
variant.shredding.minFieldCardinalityRatio	0.1	Double	Minimum fraction of rows that must contain a field for it to be shredded. Fields below this threshold will remain in the un-shredded Variant binary.
variant.shreddingSchema	(none)	String	The Variant shredding schema for writing.
visibility-callback.check-interval	10 s	Duration	The interval for checking visibility when visibility-callback enabled.
visibility-callback.enabled	false	Boolean	Whether to enable the visibility wait callback that waits for compaction to complete after commit. This is useful for primary key tables with deletion vectors or postpone bucket mode to ensure data visibility, only used for batch mode or bounded stream.
visibility-callback.timeout	30 min	Duration	The maximum time to wait for compaction to complete when visibility callback is enabled. If the timeout is reached, an exception will be thrown.
write-buffer-for-append	false	Boolean	This option only works for append-only table. Whether the write use write buffer to avoid out-of-memory error.
write-buffer-size	256 mb	MemorySize	Amount of data to build up in memory before converting to a sorted on-disk file.
write-buffer-spill.max-disk-size	infinite	MemorySize	The max disk to use for write buffer spill. This only work when the write buffer spill is enabled
write-buffer-spillable	true	Boolean	Whether the write buffer can be spillable.
write-max-writers-to-spill	10	Integer	When in batch append inserting, if the writer number is greater than this option, we open the buffer cache and spill function to avoid out-of-memory.
write-only	false	Boolean	If set to true, compactions and snapshot expiration will be skipped. This option is used along with dedicated compact jobs.
write.batch-memory	128 mb	MemorySize	Write batch memory for any file format if it supports.
write.batch-size	1024	Integer	Write batch size for any file format if it supports.
zorder.var-length-contribution	8	Integer	The bytes of types (CHAR, VARCHAR, BINARY, VARBINARY) devote to the zorder sort.

CatalogOptions #

Options for paimon catalog.

Key	Default	Type	Description
cache-enabled	true	Boolean	Controls whether the catalog will cache databases, tables, manifests and partitions.
cache.deletion-vectors.max-num	100000	Integer	Controls the maximum number of deletion vector meta that can be cached.
cache.expire-after-access	10 min	Duration	Cache expiration policy: marks cache entries to expire after a specified duration has passed since their last access.
cache.expire-after-write	30 min	Duration	Cache expiration policy: marks cache entries to expire after a specified duration has passed since their last refresh.
cache.manifest.max-memory	(none)	MemorySize	Controls the maximum memory to cache manifest content.
cache.manifest.small-file-memory	128 mb	MemorySize	Controls the cache memory to cache small manifest files.
cache.manifest.small-file-threshold	1 mb	MemorySize	Controls the threshold of small manifest file.
cache.partition.max-num	0	Long	Controls the max number for which partitions in the catalog are cached.
cache.snapshot.max-num-per-table	20	Integer	Controls the max number for snapshots per table in the catalog are cached.
case-sensitive	(none)	Boolean	Indicates whether this catalog is case-sensitive.
client-pool-size	2	Integer	Configure the size of the connection pool.
file-io.allow-cache	true	Boolean	Whether to allow static cache in file io implementation. If not allowed, this means that there may be a large number of FileIO instances generated, enabling caching can lead to resource leakage.
format-table.enabled	true	Boolean	Whether to support format tables, format table corresponds to a regular csv, parquet or orc table, allowing read and write operations. However, during these processes, it does not connect to the metastore; hence, newly added partitions will not be reflected in the metastore and need to be manually added as separate partition operations.
lock-acquire-timeout	8 min	Duration	The maximum time to wait for acquiring the lock.
lock-check-max-sleep	8 s	Duration	The maximum sleep time when retrying to check the lock.
lock.enabled	(none)	Boolean	Enable Catalog Lock.
lock.type	(none)	String	The Lock Type for Catalog, such as 'hive', 'zookeeper'.
metastore	"filesystem"	String	Metastore of paimon catalog, supports filesystem, hive and jdbc.
resolving-file-io.enabled	false	Boolean	Whether to enable resolving fileio, when this option is enabled, in conjunction with the table's property data-file.external-paths, Paimon can read and write to external storage paths, such as OSS or S3. In order to access these external paths correctly, you also need to configure the corresponding access key and secret key.
sync-all-properties	true	Boolean	Sync all table properties to hive metastore
table.type	managed	Enum	Type of table. Possible values: "managed": Paimon owned table where the entire lifecycle of the table data is managed. "external": The table where Paimon has loose coupling with the data stored in external locations.
uri	(none)	String	Uri of metastore server.
warehouse	(none)	String	The warehouse root path of catalog.

HiveCatalogOptions #

Options for Hive catalog.

Key	Default	Type	Description
client-pool-cache.eviction-interval-ms	300000	Long	Setting the client's pool cache eviction interval(ms).
client-pool-cache.keys	(none)	String	Specify client cache key, multiple elements separated by commas. "ugi": the Hadoop UserGroupInformation instance that represents the current user using the cache. "user_name" similar to UGI but only includes the user's name determined by UserGroupInformation#getUserName. "conf": name of an arbitrary configuration. The value of the configuration will be extracted from catalog properties and added to the cache key. A conf element should start with a "conf:" prefix which is followed by the configuration name. E.g. specifying "conf:a.b.c" will add "a.b.c" to the key, and so that configurations with different default catalog wouldn't share the same client pool. Multiple conf elements can be specified.
hadoop-conf-dir	(none)	String	File directory of the core-site.xml、hdfs-site.xml、yarn-site.xml、mapred-site.xml. Currently, only local file system paths are supported. If not configured, try to load from 'HADOOP_CONF_DIR' or 'HADOOP_HOME' system environment. Configure Priority: 1.from 'hadoop-conf-dir' 2.from HADOOP_CONF_DIR 3.from HADOOP_HOME/conf 4.HADOOP_HOME/etc/hadoop.
hive-conf-dir	(none)	String	File directory of the hive-site.xml , used to create HiveMetastoreClient and security authentication, such as Kerberos, LDAP, Ranger and so on. If not configured, try to load from 'HIVE_CONF_DIR' env.
location-in-properties	false	Boolean	Setting the location in properties of hive table/database. If you don't want to access the location by the filesystem of hive when using a object storage such as s3,oss you can set this option to true.
metastore.client.class	"org.apache.hadoop.hive.metastore.HiveMetaStoreClient"	String	Class name of Hive metastore client. NOTE: This class must directly implements org.apache.hadoop.hive.metastore.IMetaStoreClient.

HiveConnectorOptions #

Hive connector options for paimon.

Key	Default	Type	Description
paimon.respect.minmaxsplitsize.enabled	false	Boolean	If true, Paimon will calculate the size of split through hive parameters about splits such as 'mapreduce.input.fileinputformat.split.minsize' and 'mapreduce.input.fileinputformat.split.maxsize', and then split.
paimon.split.openfilecost	(none)	Long	The cost when open a file. The config will overwrite the table property 'source.split.open-file-cost'.

JdbcCatalogOptions #

Options for Jdbc catalog.

Key	Default	Type	Description
catalog-key	"jdbc"	String	Custom jdbc catalog store key.
lock-key-max-length	255	Integer	Set the maximum length of the lock key. The 'lock-key' is composed of concatenating three fields : 'catalog-key', 'database', and 'table'.

FlinkCatalogOptions #

Flink catalog options for paimon.

Key	Default	Type	Description
default-database	"default"	String
disable-create-table-in-default-db	false	Boolean	If true, creating table in default database is not allowed. Default is false.

FlinkConnectorOptions #

Flink connector options for paimon.

Key	Default	Type	Description
changelog.precommit-compact.thread-num	(none)	Integer	Maximum number of threads to copy bytes from small changelog files. By default is the number of processors available to the Java virtual machine.
commit.custom-listeners	(none)	String	Commit listener will be called after a successful commit. This option list custom commit listener identifiers separated by comma.
end-input.watermark	(none)	Long	Optional endInput watermark used in case of batch mode or bounded stream.
filesystem.job-level-settings.enabled	true	Boolean	Enable pass job level filesystem settings to table file IO.
lookup.async	false	Boolean	Whether to enable async lookup join.
lookup.async-thread-number	16	Integer	The thread number for lookup async.
lookup.bootstrap-parallelism	4	Integer	The parallelism for bootstrap in a single task for lookup join.
lookup.cache	AUTO	Enum	The cache mode of lookup join. Possible values: "AUTO" "FULL" "MEMORY"
lookup.dynamic-partition.refresh-interval	1 h	Duration	Specific dynamic partition refresh interval for lookup, scan all partitions and obtain corresponding partition.
lookup.refresh.async	false	Boolean	Whether to refresh lookup table in an async thread.
lookup.refresh.async.pending-snapshot-count	5	Integer	If the pending snapshot count exceeds the threshold, lookup operator will refresh the table in sync.
lookup.refresh.full-load-threshold	(none)	Integer	If the pending snapshot count exceeds this threshold, lookup table will discard incremental updates and refresh the entire table from the latest snapshot. This can improve performance when there are many snapshots pending. Set to a reasonable value (e.g., 10) to enable this optimization. Default is Integer.MAX_VALUE (disabled).
lookup.refresh.time-periods-blacklist	(none)	String	The blacklist contains several time periods. During these time periods, the lookup table's cache refreshing is forbidden. Blacklist format is start1->end1,start2->end2,... , and the time format is yyyy-MM-dd HH:mm. Only used when lookup table is FULL cache mode.
partition.idle-time-to-done	(none)	Duration	Set a time duration when a partition has no new data after this time duration, mark the done status to indicate that the data is ready.
partition.mark-done-action.mode	process-time	Enum	How to trigger partition mark done action. Possible values: "process-time": Based on the time of the machine, mark the partition done once the processing time passes period time plus delay. "watermark": Based on the watermark of the input, mark the partition done once the watermark passes period time plus delay.
partition.mark-done.recover-from-state	true	Boolean	Whether trigger partition mark done when recover from state.
partition.time-interval	(none)	Duration	You can specify time interval for partition, for example, daily partition is '1 d', hourly partition is '1 h'.
precommit-compact	false	Boolean	If true, it will add a compact coordinator and worker operator after the writer operator,in order to compact several changelog files (for primary key tables) or newly created data files (for unaware bucket tables) from the same partition into large ones, which can decrease the number of small files.
read.shuffle-bucket-with-partition	true	Boolean	Whether shuffle by partition and bucket when read.
scan.bounded	(none)	Boolean	Bounded mode for Paimon consumer. By default, Paimon automatically selects bounded mode based on the mode of the Flink job.
scan.dedicated-split-generation	false	Boolean	If true, the split generation process would be performed during runtime on a Flink task, instead of on the JobManager during initialization phase.
scan.infer-parallelism	true	Boolean	If it is false, parallelism of source are set by global parallelism. Otherwise, source parallelism is inferred from splits number (batch mode) or bucket number(streaming mode).
scan.infer-parallelism.max	1024	Integer	If scan.infer-parallelism is true, limit the parallelism of source through this option.
scan.max-snapshot.count	-1	Integer	The max snapshot count to scan per checkpoint. Not limited when it's negative.
scan.parallelism	(none)	Integer	Define a custom parallelism for the scan source. By default, if this option is not defined, the planner will derive the parallelism for each statement individually by also considering the global configuration. If user enable the scan.infer-parallelism, the planner will derive the parallelism by inferred parallelism.
scan.partitions	(none)	String	Specify the partitions to scan. Partitions should be given in the form of key1=value1,key2=value2. Partition keys not specified will be filled with the value of partition.default-name. Multiple partitions should be separated by semicolon (;). This option can support normal source tables and lookup join tables. There are two special values max_pt() and max_two_pt() are also supported to specify the (two) partition(s) with the largest partition value. For lookup source, the max partition(s) will be periodically refreshed; for normal source, the max partition(s) will be determined before job running without refreshing even for streaming jobs.
scan.remove-normalize	false	Boolean	Whether to force the removal of the normalize node when streaming read. Note: This is dangerous and is likely to cause data errors if downstream is used to calculate aggregation and the input is not complete changelog.
scan.split-enumerator.batch-size	10	Integer	How many splits should assign to subtask per batch in StaticFileStoreSplitEnumerator to avoid exceed `akka.framesize` limit.
scan.split-enumerator.mode	fair	Enum	The mode used by StaticFileStoreSplitEnumerator to assign splits. Possible values: "fair": Distribute splits evenly when batch reading to prevent a few tasks from reading all. "preemptive": Distribute splits preemptively according to the consumption speed of the task.
scan.watermark.alignment.group	(none)	String	A group of sources to align watermarks.
scan.watermark.alignment.max-drift	(none)	Duration	Maximal drift to align watermarks, before we pause consuming from the source/task/partition.
scan.watermark.alignment.update-interval	1 s	Duration	How often tasks should notify coordinator about the current watermark and how often the coordinator should announce the maximal aligned watermark.
scan.watermark.emit.strategy	on-event	Enum	Emit strategy for watermark generation. Possible values: "on-periodic": Emit watermark periodically, interval is controlled by Flink 'pipeline.auto-watermark-interval'. "on-event": Emit watermark per record.
scan.watermark.idle-timeout	(none)	Duration	If no records flow in a partition of a stream for that amount of time, then that partition is considered "idle" and will not hold back the progress of watermarks in downstream operators.
sink.clustering.sample-factor	100	Integer	Specifies the sample factor. Let S represent the total number of samples, F represent the sample factor, and P represent the sink parallelism, then S=F×P. The minimum allowed sample factor is 20.
sink.clustering.sort-in-cluster	true	Boolean	Indicates whether to further sort data belonged to each sink task after range partitioning.
sink.committer-cpu	1.0	Double	Sink committer cpu to control cpu cores of global committer.
sink.committer-memory	(none)	MemorySize	Sink committer memory to control heap memory of global committer.
sink.committer-operator-chaining	true	Boolean	Allow sink committer and writer operator to be chained together
sink.cross-partition.managed-memory	256 mb	MemorySize	Weight of managed memory for RocksDB in cross-partition update, Flink will compute the memory size according to the weight, the actual memory used depends on the running environment.
sink.managed.writer-buffer-memory	256 mb	MemorySize	Weight of writer buffer in managed memory, Flink will compute the memory size for writer according to the weight, the actual memory used depends on the running environment.
sink.operator-uid.suffix	(none)	String	Set the uid suffix for the writer, dynamic bucket assigner and committer operators. The uid format is ${UID_PREFIX}_${TABLE_NAME}_${USER_UID_SUFFIX}. If the uid suffix is not set, flink will automatically generate the operator uid, which may be incompatible when the topology changes.
sink.parallelism	(none)	Integer	Defines a custom parallelism for the sink. By default, if this option is not defined, the planner will derive the parallelism for each statement individually by also considering the global configuration.
sink.savepoint.auto-tag	false	Boolean	If true, a tag will be automatically created for the snapshot created by flink savepoint.
sink.use-managed-memory-allocator	false	Boolean	If true, flink sink will use managed memory for merge tree; otherwise, it will create an independent memory allocator.
sink.writer-coordinator.cache-memory	2 gb	MemorySize	Controls the cache memory of writer coordinator to cache manifest files in Job Manager.
sink.writer-coordinator.enabled	false	Boolean	Enable sink writer coordinator to plan data files in Job Manager.
sink.writer-coordinator.page-size	32 kb	MemorySize	Controls the page size for one RPC request of writer coordinator.
sink.writer-cpu	1.0	Double	Sink writer cpu to control cpu cores of writer.
sink.writer-memory	(none)	MemorySize	Sink writer memory to control heap memory of writer.
sink.writer-refresh-detectors	(none)	String	The option groups which are expected to be refreshed when streaming writing, multiple option group separated by commas. Now only 'external-paths' is supported.
source.checkpoint-align.enabled	false	Boolean	Whether to align the flink checkpoint with the snapshot of the paimon table, If true, a checkpoint will only be made if a snapshot is consumed.
source.checkpoint-align.timeout	30 s	Duration	If the new snapshot has not been generated when the checkpoint starts to trigger, the enumerator will block the checkpoint and wait for the new snapshot. Set the maximum waiting time to avoid infinite waiting, if timeout, the checkpoint will fail. Note that it should be set smaller than the checkpoint timeout.
source.operator-uid.suffix	(none)	String	Set the uid suffix for the source operators. After setting, the uid format is ${UID_PREFIX}_${TABLE_NAME}_${USER_UID_SUFFIX}. If the uid suffix is not set, flink will automatically generate the operator uid, which may be incompatible when the topology changes.
unaware-bucket.compaction.parallelism	(none)	Integer	Defines a custom parallelism for the unaware-bucket table compaction job. By default, if this option is not defined, the planner will derive the parallelism for each statement individually by also considering the global configuration.

SparkCatalogOptions #

Spark catalog options for paimon.

Key	Default	Type	Description
catalog.create-underlying-session-catalog	false	Boolean	If true, create and use an underlying session catalog instead of default session catalog when use SparkGenericCatalog.
defaultDatabase	"default"	String	The default database name.
v1Function.enabled	true	Boolean	Whether to enable v1 function.

SparkConnectorOptions #

Spark connector options for paimon.

Key	Default	Type	Description
read.allow.fullScan	true	Boolean	Whether to allow full scan when reading a partitioned table.
read.changelog	false	Boolean	Whether to read row in the form of changelog (add rowkind column in row to represent its change type).
read.stream.maxBytesPerTrigger	(none)	Long	The maximum number of bytes returned in a single batch.
read.stream.maxFilesPerTrigger	(none)	Integer	The maximum number of files returned in a single batch.
read.stream.maxRowsPerTrigger	(none)	Long	The maximum number of rows returned in a single batch.
read.stream.maxTriggerDelayMs	(none)	Long	The maximum delay between two adjacent batches, which used to create MinRowsReadLimit with read.stream.minRowsPerTrigger together.
read.stream.minRowsPerTrigger	(none)	Long	The minimum number of rows returned in a single batch, which used to create MinRowsReadLimit with read.stream.maxTriggerDelayMs together.
requiredSparkConfsCheck.enabled	true	Boolean	Whether to verify SparkSession is initialized with required configurations.
source.split.target-size-with-column-pruning	false	Boolean	Whether to adjust the target split size based on pruned (projected) columns. If enabled, split size estimation uses only the columns actually being read.
write.merge-schema	false	Boolean	If true, merge the data schema and the table schema automatically before write data.
write.merge-schema.explicit-cast	false	Boolean	If true, allow to merge data types if the two types meet the rules for explicit casting.
write.use-v2-write	false	Boolean	If true, v2 write will be used. Currently, only HASH_FIXED and BUCKET_UNAWARE bucket modes are supported. Will fall back to v1 write for other bucket modes. Currently, Spark V2 write does not support TableCapability.STREAMING_WRITE.

ORC Options #

Key	Default	Type	Description
orc.column.encoding.direct	(none)	Integer	Comma-separated list of fields for which dictionary encoding is to be skipped in orc.
orc.dictionary.key.threshold	0.8	Double	If the number of distinct keys in a dictionary is greater than this fraction of the total number of non-null rows, turn off dictionary encoding in orc. Use 0 to always disable dictionary encoding. Use 1 to always use dictionary encoding.
orc.timestamp-ltz.legacy.type	true	Boolean	This option is used to be compatible with the paimon-orc's old behavior for the `timestamp_ltz` data type.

RocksDB Options #

The following options allow users to finely adjust RocksDB for better performance. You can either specify them in table properties or in dynamic table hints.

Key	Default	Type	Description
lookup.cache-rows	10000	Long	The maximum number of rows to store in the cache.
lookup.continuous.discovery-interval	(none)	Duration	The discovery interval of lookup continuous reading. This is used as an SQL hint. If it's not configured, the lookup function will fallback to 'continuous.discovery-interval'.
rocksdb.block.blocksize	4 kb	MemorySize	The approximate size (in bytes) of user data packed per block. The default blocksize is '4KB'.
rocksdb.block.cache-size	128 mb	MemorySize	The amount of the cache for data blocks in RocksDB.
rocksdb.block.metadata-blocksize	4 kb	MemorySize	Approximate size of partitioned metadata packed per block. Currently applied to indexes block when partitioned index/filters option is enabled. The default blocksize is '4KB'.
rocksdb.bloom-filter.bits-per-key	10.0	Double	Bits per key that bloom filter will use, this only take effect when bloom filter is used. The default value is 10.0.
rocksdb.bloom-filter.block-based-mode	false	Boolean	If true, RocksDB will use block-based filter instead of full filter, this only take effect when bloom filter is used. The default value is 'false'.
rocksdb.compaction.level.max-size-level-base	256 mb	MemorySize	The upper-bound of the total size of level base files in bytes. The default value is '256MB'.
rocksdb.compaction.level.target-file-size-base	64 mb	MemorySize	The target file size for compaction, which determines a level-1 file size. The default value is '64MB'.
rocksdb.compaction.level.use-dynamic-size	false	Boolean	If true, RocksDB will pick target size of each level dynamically. From an empty DB, RocksDB would make last level the base level, which means merging L0 data into the last level, until it exceeds max_bytes_for_level_base. And then repeat this process for second last level and so on. The default value is 'false'. For more information, please refer to RocksDB's doc.
rocksdb.compaction.style	LEVEL	Enum	The specified compaction style for DB. Candidate compaction style is LEVEL, FIFO, UNIVERSAL or NONE, and Flink chooses 'LEVEL' as default style. Possible values: "LEVEL" "UNIVERSAL" "FIFO" "NONE"
rocksdb.compression.type	LZ4_COMPRESSION	Enum	The compression type. Possible values: "NO_COMPRESSION" "SNAPPY_COMPRESSION" "ZLIB_COMPRESSION" "BZLIB2_COMPRESSION" "LZ4_COMPRESSION" "LZ4HC_COMPRESSION" "XPRESS_COMPRESSION" "ZSTD_COMPRESSION" "DISABLE_COMPRESSION_OPTION"
rocksdb.files.open	-1	Integer	The maximum number of open files (per stateful operator) that can be used by the DB, '-1' means no limit. The default value is '-1'.
rocksdb.thread.num	2	Integer	The maximum number of concurrent background flush and compaction jobs (per stateful operator). The default value is '2'.
rocksdb.use-bloom-filter	false	Boolean	If true, every newly created SST file will contain a Bloom filter. It is disabled by default.
rocksdb.writebuffer.count	2	Integer	The maximum number of write buffers that are built up in memory. The default value is '2'.
rocksdb.writebuffer.number-to-merge	1	Integer	The minimum number of write buffers that will be merged together before writing to storage. The default value is '1'.
rocksdb.writebuffer.size	64 mb	MemorySize	The amount of data built up in memory (backed by an unsorted log on disk) before converting to a sorted on-disk files. The default writebuffer size is '64MB'.

Configuration #

CoreOptions #

aggregation.remove-record-on-delete

add-column-before-partition

alter-column-null-to-not-null.disabled

async-file-write

auto-create

blob-as-descriptor

blob-descriptor-field

blob-external-storage-field

blob-external-storage-path

blob-field

blob.split-by-file-size

blob.target-file-size

bucket

bucket-append-ordered

bucket-function.type

bucket-key

cache-page-size

chain-table.enabled

changelog-file.compression

changelog-file.format

changelog-file.prefix

changelog-file.stats-mode

changelog-producer

changelog-producer.row-deduplicate

changelog-producer.row-deduplicate-ignore-fields

changelog.num-retained.max

changelog.num-retained.min

changelog.time-retained

clustering.columns

clustering.history-partition.idle-to-full-sort

clustering.history-partition.limit

clustering.incremental

clustering.incremental.optimize-write

clustering.strategy

commit.callback.#.param

commit.callbacks

commit.discard-duplicate-files

commit.force-compact

commit.force-create-snapshot

commit.max-retries

commit.max-retry-wait

commit.min-retry-wait

commit.strict-mode.last-safe-snapshot

commit.timeout

commit.user-prefix

compaction.delete-ratio-threshold

compaction.force-rewrite-all-files

compaction.force-up-level-0

compaction.incremental-size-threshold

compaction.max-size-amplification-percent

compaction.min.file-num

compaction.offpeak-ratio

compaction.offpeak.end.hour

compaction.offpeak.start.hour

compaction.optimization-interval

compaction.size-ratio

compaction.total-size-threshold

consumer-id

consumer.expiration-time

consumer.ignore-progress

consumer.mode

continuous.discovery-interval

cross-partition-upsert.bootstrap-parallelism

cross-partition-upsert.index-ttl

data-evolution.enabled

data-file.external-paths

data-file.external-paths.specific-fs

data-file.external-paths.strategy

data-file.external-paths.weights

data-file.path-directory

data-file.prefix

data-file.thin-mode

delete.force-produce-changelog

deletion-vector.index-file.target-size

deletion-vectors.bitmap64

deletion-vectors.enabled

deletion-vectors.modifiable

disable-explicit-type-casting