public final class VectorizedRleValuesReader extends org.apache.parquet.column.values.ValuesReader implements VectorizedValuesReader
This encoding is used in multiple places: - Definition/Repetition levels - Dictionary ids. - Boolean type values of Parquet DataPageV2
VectorizedValuesReader.ByteBufferOutputWriter, VectorizedValuesReader.IntegerOutputWriter
Constructor and Description |
---|
VectorizedRleValuesReader() |
VectorizedRleValuesReader(int bitWidth) |
VectorizedRleValuesReader(int bitWidth,
boolean readLength) |
Modifier and Type | Method and Description |
---|---|
void |
initFromPage(int valueCount,
org.apache.parquet.bytes.ByteBufferInputStream in) |
void |
readBatch(ParquetReadState state,
WritableColumnVector values,
WritableIntVector defLevels,
VectorizedValuesReader valueReader,
ParquetVectorUpdater updater)
Reads a batch of definition levels and values into vector 'defLevels' and 'values'
respectively.
|
void |
readBatchRepeated(ParquetReadState state,
WritableIntVector repLevels,
VectorizedRleValuesReader defLevelsReader,
WritableIntVector defLevels,
WritableColumnVector values,
VectorizedValuesReader valueReader,
ParquetVectorUpdater updater)
Reads a batch of repetition levels, definition levels and values into 'repLevels',
'defLevels' and 'values' respectively.
|
void |
readBatchRepeatedInternal(ParquetReadState state,
WritableIntVector repLevels,
VectorizedRleValuesReader defLevelsReader,
WritableIntVector defLevels,
WritableColumnVector values,
WritableColumnVector nulls,
boolean valuesReused,
VectorizedValuesReader valueReader,
ParquetVectorUpdater updater)
Keep reading repetition level values from the page until either: 1) we've read enough
top-level rows to fill the current batch, or 2) we've drained the data page completely.
|
org.apache.parquet.io.api.Binary |
readBinary(int len) |
void |
readBinary(int total,
WritableBytesVector c,
int rowId) |
boolean |
readBoolean() |
void |
readBooleans(int total,
WritableBooleanVector c,
int rowId) |
byte |
readByte() |
void |
readBytes(int total,
WritableByteVector c,
int rowId) |
void |
readDoubles(int total,
WritableDoubleVector c,
int rowId) |
void |
readFloats(int total,
WritableFloatVector c,
int rowId) |
int |
readInteger() |
void |
readIntegers(int total,
WritableIntVector c,
int rowId) |
void |
readIntegers(ParquetReadState state,
WritableColumnVector values,
WritableColumnVector nulls,
WritableIntVector defLevels,
VectorizedValuesReader valueReader)
Decoding for dictionary ids.
|
void |
readIntegersRepeated(ParquetReadState state,
WritableIntVector repLevels,
VectorizedRleValuesReader defLevelsReader,
WritableIntVector defLevels,
WritableColumnVector values,
WritableColumnVector nulls,
VectorizedValuesReader valueReader)
Reads a batch of repetition levels, definition levels and integer values into 'repLevels',
'defLevels', 'values' and 'nulls' respectively.
|
void |
readLongs(int total,
WritableLongVector c,
int rowId) |
short |
readShort() |
void |
readShorts(int total,
WritableShortVector c,
int rowId) |
int |
readValueDictionaryId() |
void |
skip() |
void |
skipBinary(int total) |
void |
skipBooleans(int total) |
void |
skipBytes(int total) |
void |
skipDoubles(int total) |
void |
skipFixedLenByteArray(int total,
int len) |
void |
skipFloats(int total) |
void |
skipIntegers(int total) |
void |
skipLongs(int total) |
void |
skipShorts(int total) |
getNextOffset, initFromPage, initFromPage, readBytes, readDouble, readFloat, readLong, skip, updateNextOffset
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
readDouble, readFloat, readLong
public VectorizedRleValuesReader()
public VectorizedRleValuesReader(int bitWidth)
public VectorizedRleValuesReader(int bitWidth, boolean readLength)
public void initFromPage(int valueCount, org.apache.parquet.bytes.ByteBufferInputStream in) throws IOException
initFromPage
in class org.apache.parquet.column.values.ValuesReader
IOException
public boolean readBoolean()
readBoolean
in interface VectorizedValuesReader
readBoolean
in class org.apache.parquet.column.values.ValuesReader
public void skip()
skip
in class org.apache.parquet.column.values.ValuesReader
public int readValueDictionaryId()
readValueDictionaryId
in class org.apache.parquet.column.values.ValuesReader
public int readInteger()
readInteger
in interface VectorizedValuesReader
readInteger
in class org.apache.parquet.column.values.ValuesReader
public void readBatch(ParquetReadState state, WritableColumnVector values, WritableIntVector defLevels, VectorizedValuesReader valueReader, ParquetVectorUpdater updater)
The related states such as row index, offset, number of values left in the batch and page, are tracked by 'state'. The type-specific 'updater' is used to update or skip values.
This reader reads the definition levels and then will read from 'valueReader' for the non-null values. If the value is null, 'values' will be populated with null value.
public void readIntegers(ParquetReadState state, WritableColumnVector values, WritableColumnVector nulls, WritableIntVector defLevels, VectorizedValuesReader valueReader)
public void readBatchRepeated(ParquetReadState state, WritableIntVector repLevels, VectorizedRleValuesReader defLevelsReader, WritableIntVector defLevels, WritableColumnVector values, VectorizedValuesReader valueReader, ParquetVectorUpdater updater)
The related states such as row index, offset, number of rows left in the batch and page, are tracked by 'state'. The type-specific 'updater' is used to update or skip values.
public void readIntegersRepeated(ParquetReadState state, WritableIntVector repLevels, VectorizedRleValuesReader defLevelsReader, WritableIntVector defLevels, WritableColumnVector values, WritableColumnVector nulls, VectorizedValuesReader valueReader)
The 'values' vector is used to hold non-null values, while 'nulls' vector is used to hold null values.
The related states such as row index, offset, number of rows left in the batch and page, are tracked by 'state'.
Unlike 'readBatchRepeated', this is used to decode dictionary indices in dictionary encoding.
public void readBatchRepeatedInternal(ParquetReadState state, WritableIntVector repLevels, VectorizedRleValuesReader defLevelsReader, WritableIntVector defLevels, WritableColumnVector values, WritableColumnVector nulls, boolean valuesReused, VectorizedValuesReader valueReader, ParquetVectorUpdater updater)
valuesReused
- whether 'values' vector is reused for 'nulls'public void readIntegers(int total, WritableIntVector c, int rowId)
readIntegers
in interface VectorizedValuesReader
public byte readByte()
readByte
in interface VectorizedValuesReader
public short readShort()
readShort
in interface VectorizedValuesReader
public void readBytes(int total, WritableByteVector c, int rowId)
readBytes
in interface VectorizedValuesReader
public void readShorts(int total, WritableShortVector c, int rowId)
readShorts
in interface VectorizedValuesReader
public void readLongs(int total, WritableLongVector c, int rowId)
readLongs
in interface VectorizedValuesReader
public void readBinary(int total, WritableBytesVector c, int rowId)
readBinary
in interface VectorizedValuesReader
public void readBooleans(int total, WritableBooleanVector c, int rowId)
readBooleans
in interface VectorizedValuesReader
public void readFloats(int total, WritableFloatVector c, int rowId)
readFloats
in interface VectorizedValuesReader
public void readDoubles(int total, WritableDoubleVector c, int rowId)
readDoubles
in interface VectorizedValuesReader
public org.apache.parquet.io.api.Binary readBinary(int len)
readBinary
in interface VectorizedValuesReader
public void skipIntegers(int total)
skipIntegers
in interface VectorizedValuesReader
public void skipBooleans(int total)
skipBooleans
in interface VectorizedValuesReader
public void skipBytes(int total)
skipBytes
in interface VectorizedValuesReader
public void skipShorts(int total)
skipShorts
in interface VectorizedValuesReader
public void skipLongs(int total)
skipLongs
in interface VectorizedValuesReader
public void skipFloats(int total)
skipFloats
in interface VectorizedValuesReader
public void skipDoubles(int total)
skipDoubles
in interface VectorizedValuesReader
public void skipBinary(int total)
skipBinary
in interface VectorizedValuesReader
public void skipFixedLenByteArray(int total, int len)
skipFixedLenByteArray
in interface VectorizedValuesReader
Copyright © 2023–2025 The Apache Software Foundation. All rights reserved.