微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

flink 读取 CSV 文件,并将 DataStream 转 Table 对象

package com.myflink

import java.lang.reflect.Field
import java.util

import org.apache.flink.api.common.typeinfo._
import org.apache.flink.api.java.io.{PojoCsvInputFormat, RowCsvInputFormat}
import org.apache.flink.api.java.typeutils.{PojoField, PojoTypeInfo}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.table.api.Tumble
import org.apache.flink.table.api.scala._
import org.apache.flink.types.Row

object Main {

  def main(args: Array[String]): Unit = {
    import org.apache.flink.streaming.api.scala._

    val env = StreamExecutionEnvironment.getExecutionEnvironment;
    env.setParallelism(1);
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    val senSor = SenSor("a", 0L, 0.2);
    val lst = new util.ArrayList[PojoField]();
    val arrFields: Array[Field] = senSor.getClass.getDeclaredFields;
    for (field <- arrFields) {
      lst.add(new PojoField(field, Type@R_845_404[email protected](field.getType)));
    }

    val path = new Path("D:\\allspace\\flink0906\\src\\main\\resources\\input\\test.csv");
    val ds: DataStream[SenSor] = env
      .createInput(new PojoCsvInputFormat(path, new PojoTypeInfo(classOf[SenSor], lst)))
      .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SenSor](Time.seconds(10)) {
        override def extractTimestamp(element: SenSor): Long = element.timestamp * 1000
      });

    val tableEnv = StreamTableEnvironment.create(env);
    val table = tableEnv.fromDataStream(ds, 'id, 'timestamp.rowtime, 'uempearture);
    table.window(Tumble over 10.seconds on 'timestamp as 'tw)
      .groupBy('id, 'tw)
      .select('id, 'id.count, 'uempearture.avg, 'tw.end)
      .toRetractStream[Row].print()


    env.execute();
  }

}

case class SenSor(var id: String, var timestamp: Long, var uempearture: Double) {};

  这里面坑:

SenSor 对象的属性,在构造 PojoTypeInfo 时按照名字重排序,这直接造成对 csv 文件解析出错。

csv文件内容:
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4

flink版本为 1.10.1




版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。

相关推荐