ablog

不器用で落着きのない技術者のメモ

Presto でParquet にクエリするとjava.lang.UnsupportedOperationException: com.facebook.presto.spi.type.LongDecimalType

事象

  • S3のParquetファイルにHiveカタログで外部表を定義して、Presto でクエリすると、"java.lang.UnsupportedOperationException: com.facebook.presto.spi.type.LongDecimalType" という例外が発生する。
$ hive
CREATE EXTERNAL TABLE IF NOT EXISTS sh10.sales(
  prod_id DECIMAL(38,0),
  cust_id DECIMAL(38,0),
  time_id TIMESTAMP,
  channel_id DECIMAL(38,0),
  promo_id DECIMAL(38,0),
  quantity_sold DECIMAL(38,2),
  seller INT,
  fulfillment_center INT,
  courier_org INT,
  tax_country VARCHAR(3),
  tax_region VARCHAR(3),
  amount_sold DECIMAL(38,2)
)
PARTITIONED BY (year int, month int)
STORED AS PARQUET
LOCATION 's3://.../data/parquet_pyspark/sh10/sales/'
  tblproperties ("parquet.compress"="SNAPPY")
;
hive> MSCK REPAIR TABLE sh10.sales;
hive> exit;

$ presto-cli
presto> use hive.sh10;
presto:sh10> select * from sales where prod_id is not null limit 10;
Query failed (#query_id): com.facebook.presto.spi.type.LongDecimalType
  • /var/log/presto/server.log
2018-12-04T03:22:56.740Z        ERROR   remote-task-callback-73 com.facebook.presto.execution.StageStateMachine Stage 20181204_032254_00002_emvnj.1 failed
java.lang.UnsupportedOperationException: com.facebook.presto.spi.type.LongDecimalType
        at com.facebook.presto.spi.type.AbstractType.writeLong(AbstractType.java:111)
        at com.facebook.presto.hive.parquet.reader.ParquetIntColumnReader.readValue(ParquetIntColumnReader.java:32)
        at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.lambda$readValues$1(ParquetPrimitiveColumnReader.java:184)
        at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.processValues(ParquetPrimitiveColumnReader.java:204)
        at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.readValues(ParquetPrimitiveColumnReader.java:183)
        at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.readPrimitive(ParquetPrimitiveColumnReader.java:171)
        at com.facebook.presto.hive.parquet.reader.ParquetReader.readPrimitive(ParquetReader.java:209)
        at com.facebook.presto.hive.parquet.reader.ParquetReader.readColumnChunk(ParquetReader.java:259)
        at com.facebook.presto.hive.parquet.reader.ParquetReader.readBlock(ParquetReader.java:242)
        at com.facebook.presto.hive.parquet.ParquetPageSource$ParquetBlockLoader.load(ParquetPageSource.java:244)
        at com.facebook.presto.hive.parquet.ParquetPageSource$ParquetBlockLoader.load(ParquetPageSource.java:222)
        at com.facebook.presto.spi.block.LazyBlock.assureLoaded(LazyBlock.java:269)
        at com.facebook.presto.spi.block.LazyBlock.getLoadedBlock(LazyBlock.java:260)
        at com.facebook.presto.operator.project.DictionaryAwarePageProjection$DictionaryAwarePageProjectionWork.<init>(DictionaryAwarePageProjection.java:97)
        at com.facebook.presto.operator.project.DictionaryAwarePageProjection.project(DictionaryAwarePageProjection.java:75)
        at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.processBatch(PageProcessor.java:276)
        at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.computeNext(PageProcessor.java:182)
        at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.computeNext(PageProcessor.java:129)
        at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141)
        at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:136)
        at com.facebook.presto.operator.project.PageProcessorOutput.hasNext(PageProcessorOutput.java:49)
        at com.facebook.presto.operator.project.MergingPageOutput.getOutput(MergingPageOutput.java:110)
        at com.facebook.presto.operator.ScanFilterAndProjectOperator.processPageSource(ScanFilterAndProjectOperator.java:287)
        at com.facebook.presto.operator.ScanFilterAndProjectOperator.getOutput(ScanFilterAndProjectOperator.java:226)
        at com.facebook.presto.operator.Driver.processInternal(Driver.java:379)
        at com.facebook.presto.operator.Driver.lambda$processFor$8(Driver.java:283)
        at com.facebook.presto.operator.Driver.tryWithLock(Driver.java:675)
        at com.facebook.presto.operator.Driver.processFor(Driver.java:276)
        at com.facebook.presto.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1053)
        at com.facebook.presto.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:162)
        at com.facebook.presto.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:456)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

解決策

  • Hiveカタログで外部表を定義する際に DECIMALの精度は 17 以下にする。
$ hive 
hive> drop table sh10.sales;
hive> CREATE EXTERNAL TABLE IF NOT EXISTS sh10.sales(
  prod_id DECIMAL(17,0),
  cust_id DECIMAL(17,0),
  time_id TIMESTAMP,
  channel_id DECIMAL(17,0),
  promo_id DECIMAL(17,0),
  quantity_sold DECIMAL(17,2),
  seller INT,
  fulfillment_center INT,
  courier_org INT,
  tax_country VARCHAR(3),
  tax_region VARCHAR(3),
  amount_sold DECIMAL(17,2)
)
PARTITIONED BY (year int, month int)
STORED AS PARQUET
LOCATION 's3://.../data/parquet_pyspark/sh10/sales/'
  tblproperties ("parquet.compress"="SNAPPY")
;
hive> MSCK REPAIR TABLE sh10_option.sales;

環境

  • emr-5.19.0
  • Hive 2.3.3
  • Presto 0.212

参考

thanks @nezihyigitbasi - so Decimals(>17,x) weren't supported in 0.164

https://github.com/prestodb/presto/issues/8484
    @Override
    public void writeLong(BlockBuilder blockBuilder, long value)
    {
        throw new UnsupportedOperationException(getClass().getName());
    }
ackage com.facebook.presto.spi.type;

import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.block.BlockBuilderStatus;
import com.facebook.presto.spi.block.FixedWidthBlockBuilder;
import com.facebook.presto.spi.block.PageBuilderStatus;
import io.airlift.slice.Slice;

import static com.facebook.presto.spi.type.Decimals.MAX_PRECISION;
public final class Decimals
{
    private Decimals() {}

    public static final int MAX_PRECISION = 38;
    public static final int MAX_SHORT_PRECISION = 18;