public abstract class OrphanFilesClean extends Object implements Serializable
It will ignore exception when listing all files because it's OK to not delete unread files.
To avoid deleting newly written files, it only deletes orphan files older than olderThanMillis
(1 day by default).
To avoid conflicting with snapshot expiration, tag deletion and rollback, it will skip the
snapshot/tag when catching FileNotFoundException
in the process of listing used files.
To avoid deleting files that are used but not read by mistaken, it will stop removing process when failed to read used files.
Modifier and Type | Class and Description |
---|---|
static interface |
OrphanFilesClean.FileCleaner
Cleaner to clean files.
|
Modifier and Type | Field and Description |
---|---|
protected boolean |
dryRun |
protected FileIO |
fileIO |
protected Path |
location |
protected static org.slf4j.Logger |
LOG |
protected long |
olderThanMillis |
protected int |
partitionKeysNum |
protected static int |
READ_FILE_RETRY_INTERVAL |
protected static int |
READ_FILE_RETRY_NUM |
protected FileStoreTable |
table |
Constructor and Description |
---|
OrphanFilesClean(FileStoreTable table,
long olderThanMillis,
boolean dryRun) |
Modifier and Type | Method and Description |
---|---|
protected void |
cleanFile(Path path) |
protected void |
cleanSnapshotDir(List<String> branches,
java.util.function.Consumer<Path> deletedFilesConsumer,
java.util.function.Consumer<Long> deletedFilesLenInBytesConsumer) |
protected void |
collectWithoutDataFile(String branch,
Snapshot snapshot,
java.util.function.Consumer<String> usedFileConsumer,
java.util.function.Consumer<String> manifestConsumer) |
protected void |
collectWithoutDataFileWithManifestFlag(String branch,
Snapshot snapshot,
java.util.function.Consumer<Pair<String,Boolean>> usedFileWithFlagConsumer) |
protected List<Path> |
listPaimonFileDirs()
List directories that contains data files and manifest files.
|
protected boolean |
oldEnough(FileStatus status) |
static long |
olderThanMillis(String olderThan) |
protected static <T> T |
retryReadingFiles(SupplierWithIOException<T> reader,
T defaultValue)
Retry reading files when
IOException was thrown by the reader. |
protected Set<Snapshot> |
safelyGetAllSnapshots(String branch) |
protected List<FileStatus> |
tryBestListingDirs(Path dir)
If failed to list directory, just return an empty result because it's OK to not delete them.
|
protected void |
tryCleanDataDirectory(Set<Path> dataDirs,
int maxLevel)
Try to clean empty data directories.
|
boolean |
tryDeleteEmptyDirectory(Path path) |
protected List<String> |
validBranches() |
protected static final org.slf4j.Logger LOG
protected static final int READ_FILE_RETRY_NUM
protected static final int READ_FILE_RETRY_INTERVAL
protected final FileStoreTable table
protected final FileIO fileIO
protected final long olderThanMillis
protected final boolean dryRun
protected final int partitionKeysNum
protected final Path location
public OrphanFilesClean(FileStoreTable table, long olderThanMillis, boolean dryRun)
protected void cleanSnapshotDir(List<String> branches, java.util.function.Consumer<Path> deletedFilesConsumer, java.util.function.Consumer<Long> deletedFilesLenInBytesConsumer)
protected void cleanFile(Path path)
protected Set<Snapshot> safelyGetAllSnapshots(String branch) throws IOException
IOException
protected void collectWithoutDataFile(String branch, Snapshot snapshot, java.util.function.Consumer<String> usedFileConsumer, java.util.function.Consumer<String> manifestConsumer) throws IOException
IOException
protected void collectWithoutDataFileWithManifestFlag(String branch, Snapshot snapshot, java.util.function.Consumer<Pair<String,Boolean>> usedFileWithFlagConsumer) throws IOException
IOException
protected List<Path> listPaimonFileDirs()
protected List<FileStatus> tryBestListingDirs(Path dir)
protected static <T> T retryReadingFiles(SupplierWithIOException<T> reader, T defaultValue) throws IOException
IOException
was thrown by the reader. If the exception is
FileNotFoundException
, return default value. Finally, if retry times reaches the
limits, rethrow the IOException.IOException
protected boolean oldEnough(FileStatus status)
protected void tryCleanDataDirectory(Set<Path> dataDirs, int maxLevel)
public boolean tryDeleteEmptyDirectory(Path path)
Copyright © 2023–2025 The Apache Software Foundation. All rights reserved.