from pyspark.sql import SparkSessionfrom pyspark.sql.types import StructType, StructField, IntegerType, StringTypespark = SparkSession.builder.appName("WeDataApp").getOrCreate()schema = StructType([StructField("user_id", IntegerType(), True),StructField("user_name", StringType(), True),StructField("age", IntegerType(), True)])data = [(1, "Alice", 25), (2, "Bob", 30)]df = spark.createDataFrame(data, schema=schema)df.show()
from pyspark.sql import SparkSession#spark = SparkSession.builder.appName("WeDataApp").enableHiveSupport().getOrCreate()#df = spark.sql("SELECT * FROM WeData_demo_db.user_demo")#count = df.count()#print("The number of rows in the dataframe is:", count)
参数 | 说明 |
Python 版本 | 支持 Python2、Python3。 |
from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StructField, IntegerType, StringType import pandas as pd import sklearn spark = SparkSession.builder.appName("WeDataApp-1").getOrCreate() schema = StructType([ StructField("user_id", IntegerType(), True), StructField("user_name", StringType(), True), StructField("age", IntegerType(), True) ]) data = [(1, "Alice", 25), (2, "Bob", 30)] df = spark.createDataFrame(data, schema=schema) pandas_df = df.toPandas() df.show() print(pandas_df.head(10)) print(sklearn.__version__)
spark.yarn.dist.archives,file:///usr/local/python3/python3.zip#python3
本页内容是否解决了您的问题?