部署环境参考https://blog.csdn.net/luoye4321/article/details/99745877
代码如下:
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object HbaSEOpe {
def writeStreamToHbase(txtPath:String)={
val sparkConf = new SparkConf().setAppName("streamToHbase")
val ssc = new StreamingContext(sparkConf, Seconds(10))
val lines = ssc.textFileStream(txtPath)
lines.foreachRDD(txtRdd=>{
val tablename= "tb:table3"
val hbaseConf = HBaseConfiguration.create()
hbaseConf.set(TableOutputFormat.OUTPUT_TABLE, tablename)
val jobConf = new JobConf(hbaseConf)
jobConf.setoutputFormat(classOf[TableOutputFormat])
txtRdd.map(_.split(",")).map(arr=>{
val put = new Put(Bytes.toBytes(arr(0)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col1"),Bytes.toBytes(arr(1)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col2"),Bytes.toBytes(arr(2)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col3"),Bytes.toBytes(arr(3)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col4"),Bytes.toBytes(arr(4)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col5"),Bytes.toBytes(arr(5)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col6"),Bytes.toBytes(arr(6)))
put.addColumn(Bytes.toBytes("static"),Bytes.toBytes("col7"),Bytes.toBytes(arr(7)))
(new ImmutableBytesWritable, put)
}).saveAsHadoopDataset(jobConf)
})
ssc.start()
ssc.awaitTermination()
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。