package com.javartisan.demo import org.apache.spark.sql.SparkSession object SparkLocal { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().master("local[*]").appName("spark test").getorCreate() import spark.implicits._ val sc = spark.sparkContext val a: Int = 1 val b: Int = 1 val c: String = "1" val d: String = "1" val arr1 = Array[(Int, Int, String, String)]((a, b, c, d)) val arr2 = Array[(Int, Int, String, String)]((2, b, c, d)) val rdd1 = sc.parallelize[(Int, Int, String, String)](arr1) val rdd2 = sc.parallelize[(Int, Int, String, String)](arr2) val df1 = rdd1.toDF("a", "b", "c", "d") val df2 = rdd2.toDF("a1", "b1", "c1", "d1") df1.printSchema() df2.printSchema() val full = df1.join(df2, $"a" === $"a1", "full") val newFull = full.rdd.map(row => { //GenericRowWithSchema println("row class " + row.getClass) row }) println(newFull.count()) full.show(false) println(df1.count()) println(df2.count()) spark.stop() } }
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。