{"name":"Michael"}{"name":"Andy", "age":30}{"name":"Justin", "age":3}{"name":"WangHua", "age":19}{"name":"ZhangSan", "age":10}{"name":"LiSi", "age":33}{"name":"ZhaoWu", "age":37}{"name":"MengXiao", "age":68}{"name":"KaiDa", "age":89}
<dependency><groupId>org.apache.spark</groupId><artifactId>spark-core_2.12</artifactId><version>3.2.1</version><scope>provided</scope></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-sql_2.12</artifactId><version>3.2.1</version><scope>provided</scope></dependency>
package com.tencent.dlc;import org.apache.spark.sql.Dataset;import org.apache.spark.sql.Row;import org.apache.spark.sql.SaveMode;import org.apache.spark.sql.SparkSession;public class CosService {public static void main( String[] args ){//1. Create SparkSessionSparkSession spark = SparkSession.builder().appName("Operate data on cos").config("spark.some.config.option", "some-value").getOrCreate();//2. Read the json file from COS to generate a data set, supporting various file types such as json, csv, parquet, orc, textString readPath = "cosn://dlc-demo-1305424723/people.json";Dataset<Row> readData = spark.read().json(readPath);//3. Perform business computations on the data set to generate result data, supporting API and SQL formats. Here, a temporary table is generated and SQL is used to read data.readData.createOrReplaceTempView("people");Dataset<Row> result = spark.sql("SELECT * FROM people where age > 3");//4. Save the result data back to COSString writePath = "cosn://dlc-demo-1305424723/people_output";//Supports writing various file types such as json, csv, parquet, orc, textresult.write().mode(SaveMode.Append).json(writePath);spark.read().json(writePath).show();//5. Close the sessionspark.stop();}}
package com.tencent.dlc;import org.apache.spark.sql.SparkSession;public class DbService {public static void main(String[] args) {//1. Initialize SparkSessionSparkSession spark = SparkSession.builder().appName("Operate DB Example").getOrCreate();//2. Create a databaseString dbName = " `DataLakeCatalog`.`dlc_db_test` ";String dbSql = "CREATE DATABASE IF NOT EXISTS" + dbName + " COMMENT 'demo test'";spark.sql(dbSql);//3. Create an internal tableString tableName = "`test`";String tableSql = "CREATE TABLE IF NOT EXISTS " + dbName + "." + tableName+ "(`id` int,`name` string, `age` int)";spark.sql(tableSql);//4. Write dataspark.sql("INSERT INTO " + dbName + "." + tableName + "VALUES (1,'Andy',12),(2,'Justin',3) ");//5. Query dataspark.sql(" SELECT * FROM " + dbName + "." + tableName).show();//6. Create an external tableString extTableName = "`ext_test`";spark.sql("CREATE EXTERNAL TABLE IF NOT EXISTS " + dbName + "." + extTableName + ""+ " (`id` int, `name` string, `age` int) "+ "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' "+ "STORED AS TEXTFILE LOCATION 'cosn://dlc-demo-1305424723/ext_test '");//7. Write data to the external tablespark.sql("INSERT INTO " + dbName + "." + extTableName + "VALUES (1,'LiLy',12),(2,'Lucy',3) ");//8. Query data from the external tablespark.sql(" SELECT * FROM " + dbName + "." + extTableName).show();//9. Close the sessionspark.stop();}}
Configuration Parameter | Note |
Job Name | Custom Spark Jar job name, for example: cosn-demo. |
Job Type | Select Batch Processing Type |
Data Engine | Select the dlc-demo computing engine created in the Create Resource step. |
Program Packet | Select COS, and choose the JAR file demo-1.0-SNAPSHOT.jar uploaded in the Upload JAR file to COS step. |
Main Class | Fill in according to the program code, such as: For reading and writing data from COS, fill in: com.tencent.dlc.CosService For creating databases and tables on DLC, fill in: com.tencent.dlc.DbService. |
CAM role arn | Select the policy created in the previous step, qcs::cam::uin/100018379117:roleName/dlc-demo. |
Was this page helpful?