spark-sql --master yarn \\--num-executors 2 \\--executor-memory 1g \\--executor-cores 2 \\--jars /usr/local/service/hudi/hudi-bundle/hudi-spark3.3-bundle_2.12-0.13.0.jar \\--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \\--conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \\--conf 'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
-- 创建COW非分区表spark-sql> create table hudi_cow_nonpcf_tbl (uuid int,name string,price double) using huditblproperties (primaryKey = 'uuid');-- 创建COW分区表spark-sql> create table hudi_cow_pt_tbl (id bigint,name string,ts bigint,dt string,hh string) using huditblproperties (type = 'cow',primaryKey = 'id',preCombineField = 'ts')partitioned by (dt, hh);-- 创建MOR分区表spark-sql> create table hudi_mor_tbl (id int,name string,price double,ts bigint,dt string) using huditblproperties (type = 'mor',primaryKey = 'id',preCombineField = 'ts')partitioned by (dt);
-- insert into non-partitioned tablespark-sql> insert into hudi_cow_nonpcf_tbl select 1, 'a1', 20;-- insert dynamic partitionspark-sql> insert into hudi_cow_pt_tbl partition (dt, hh) select 1 as id, 'a1' as name, 1000 as ts, '2021-12-09' as dt, '10' as hh;-- insert static partitionspark-sql> insert into hudi_cow_pt_tbl partition(dt = '2021-12-09', hh='11') select 2, 'a2', 1000;spark-sql> insert into hudi_mor_tbl partition(dt = '2021-12-09') select 1, 'a1', 20, 1000;
hive
hive> add jar /usr/local/service/hudi/hudi-bundle/hudi-hadoop-mr-bundle-0.13.0.jar;
hive> show tables;OKhudi_cow_nonpcf_tblhudi_cow_pt_tblhudi_mor_tblhudi_mor_tbl_rohudi_mor_tbl_rtTime taken: 0.023 seconds, Fetched: 5 row(s)
hive> select * from hudi_cow_nonpcf_tbl;OK20230905170525412 20230905170525412_0_0 1 8d32a1cc-11f9-437f-9a7b-8ba9532223d3-0_0-17-15_20230905170525412.parquet 1 a1 20.0Time taken: 1.447 seconds, Fetched: 1 row(s)hive> set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;hive> select * from hudi_mor_tbl_ro;OK20230808174602565 20230808174602565_0_1 id:1 dt=2021-12-09 af40667d-1dca-4163-89ca-2c48250985b2-0_0-34-1617_20230808174602565.parquet 1 a1 20.0 1000 2021-12-09Time taken: 0.159 seconds, Fetched: 1 row(s)hive> set hive.vectorized.execution.enabled=false;hive> select name, count(*) from hudi_mor_tbl_rt group by name;a1 1Time taken: 17.618 seconds, Fetched: 1 row(s)
本页内容是否解决了您的问题?