Dataset的数据构建
构建Dataset数据
代码示例
`
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
/**
* 输入数据类型 IN:输入数据类型
*/
public class WeSpark implements Serializable {
public static class EmployeeSpark implements Serializable {
private String name;
private long salary;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public long getSalary() {
return salary;
}
public void setSalary(long salary) {
this.salary = salary;
}
public EmployeeSpark(){}
private EmployeeSpark(String name, long salary){
this.name = name;
this.salary = salary;
}
}
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Java Spark sql data sources example")
.config("spark.some.config.option", "some-value")
.master("local[2]")
.getorCreate();
//构建原始的DataFrame
// Create an instance of a Bean class
List<EmployeeSpark> Da = Arrays.asList(
new EmployeeSpark("CFF",300L),
new EmployeeSpark("ADD",200L)
);
Encoder<EmployeeSpark> personEncoder = Encoders.bean(EmployeeSpark.class);
Dataset<EmployeeSpark> items2Dataset = spark.createDataset( Da, personEncoder);
items2Dataset.printSchema();
items2Dataset.show();
// EmployeeSpark 无参数构造
System.out.println(items2Dataset.head().getName());
System.out.println(items2Dataset.head().getSalary());
}
}
`
报错解决
caused by: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 87:
出现动态代码生成报错
1.把 age/depID的类型改为
2.权限
3.构造函数
参考
https://www.jianshu.com/p/dc166fdfe840
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。