{"name":"Michael"}{"name":"Andy", "age":30}{"name":"Justin", "age":3}{"name":"WangHua", "age":19}{"name":"ZhangSan", "age":10}{"name":"LiSi", "age":33}{"name":"ZhaoWu", "age":37}{"name":"MengXiao", "age":68}{"name":"KaiDa", "age":89}
import sysfrom pyspark.sql import SparkSessionfrom pyspark.sql import Rowif __name__ == "__main__":spark = SparkSession\\.builder\\.appName("Operate data on cos")\\.getOrCreate()# 1.读cos上的数据 支持多种类型的文件 如 json,csv,parquet,orc,textread_path = "cosn://dlc-demo-1305424723/people.json"peopleDF = spark.read.json(read_path)# 2.对数据做操作peopleDF.createOrReplaceTempView("people")data_src = spark.sql("SELECT * FROM people WHERE age BETWEEN 13 AND 19")data_src.show()# 3.写数据write_path = "cosn://dlc-demo-1305424723/people_output"data_src.write.csv(path=write_path, header=True, sep=",", mode='overwrite')spark.stop()
from os.path import abspathfrom pyspark.sql import SparkSessionif __name__ == "__main__":spark = SparkSession \\.builder \\.appName("Operate DB Example") \\.getOrCreate()# 1.建数据库spark.sql("CREATE DATABASE IF NOT EXISTS `DataLakeCatalog`.`dlc_db_test_py` COMMENT 'demo test' ")# 2.建内表spark.sql("CREATE TABLE IF NOT EXISTS `DataLakeCatalog`.`dlc_db_test_py`.`test`(`id` int,`name` string,`age` int) ")# 3.写内数据spark.sql("INSERT INTO `DataLakeCatalog`.`dlc_db_test_py`.`test` VALUES (1,'Andy',12),(2,'Justin',3) ")# 4.查内数据spark.sql("SELECT * FROM `DataLakeCatalog`.`dlc_db_test_py`.`test` ").show()# 5.建外表spark.sql("CREATE EXTERNAL TABLE IF NOT EXISTS `DataLakeCatalog`.`dlc_db_test_py`.`ext_test`(`id` int, `name` string, `age` int) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE LOCATION 'cosn://cry-1305424723/ext_test' ")# 6.写外数据spark.sql("INSERT INTO `DataLakeCatalog`.`dlc_db_test_py`.`ext_test` VALUES (1,'Andy',12),(2,'Justin',3) ")# 7.查外数据spark.sql("SELECT * FROM `DataLakeCatalog`.`dlc_db_test_py`.`ext_test` ").show()spark.stop()
配置参数 | 说明 |
作业名称 | 自定义 Spark 作业名称,例如:cosn_py |
作业类型 | 选择 批处理类型 |
数据引擎 | 选择 创建资源 步骤创建的 dlc-demo 计算引擎 |
程序包 | 选择 COS,在 上传 py 文件到 COS 步骤的上传 py 文件: 从 COS 上读写数据就选择: cosn://dlc-demo-1305424723/cos.py 在 DLC 上建库、建表等选择:cosn://dlc-demo-1305424723/db.py |
数据访问策略 | 选择该步骤前创建的策略 qcs::cam::uin/100018379117:roleName/dlc-demo |
本页内容是否解决了您的问题?