概述
SourceFunction:非并行数据源(并行度只能=1) --接口
RichSourceFunction:多功能非并行数据源(并行度只能=1) --类
ParallelSourceFunction:并行数据源(并行度能够>=1) --接口
RichParallelSourceFunction:多功能并行数据源(并行度能够>=1) --类 【建议使用的】
——Rich 字样代表富有,在编程中,富有代表可以调用的方法很多,功能很全的意思。
基础案例
package com.bigdata.day02;
//1、SourceFunction
// public class ZidingyiSource implements SourceFunction<Student> {
//2、RichSourceFunction
// public class ZidingyiSource extends RichSourceFunction<Student> {
//3、ParallelSourceFunction
//public class ZidingyiSource implements ParallelSourceFunction<Student> {
//4、RichParallelSourceFunction
//public class ZidingyiSource extends RichParallelSourceFunction<Student> {
// 推荐的
public class ZidingyiSource extends RichParallelSourceFunction<Student> {
// ctrl + o
private final Random random = new Random();
private boolean flag = true;
// 现在不用
@Override
public void open(Configuration parameters) throws Exception {
System.out.println("实现一些资源的开启");
}
// 现在不用
@Override
public void close() throws Exception {
System.out.println("实现一些资源的关闭");
}
@Override
public void run(SourceContext<Student> sourceContext) throws Exception {
while (flag){
String stu_id = UUID.randomUUID().toString();
String stu_name = "Student_"+stu_id;
int stu_age = random.nextInt(8)+10;
long stu_timestamp = System.currentTimeMillis();
Student student = new Student(stu_id,stu_name,stu_age,stu_timestamp);
sourceContext.collect(student);
Thread.sleep(1000);
}
}
// 具体什么时候 会调用还不知道
@Override
public void cancel() {
flag = false;
System.out.println("停止运行");
}
}
//调用
public class ZiDingYi {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// add + new
DataStreamSource<Student> studentDataStreamSource = env.addSource(new ZidingyiSource());
int parallelism = studentDataStreamSource.getParallelism();
System.out.println(parallelism);
// print之前与之后的并行度是不同的
studentDataStreamSource.print().setParallelism(1);
env.execute();
}
}
cancel+open+close的调用时机
package com.bigdata.day02;
import java.util.Objects;
/*
* 1、这几个方法都会按照并行度调用多次 调度的次数 按照studentDataStreamSource的并行度
*
*/
public class ZiDingYi {
public static void main(String[] args) throws Exception {
// 在上面案例的基础上实现
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Student> studentDataStreamSource = env.addSource(new ZidingyiSource());
// 此时就只会调用一次了
studentDataStreamSource.setParallelism(1);
// 此时打印也会有多个并行度(8个cpu)
studentDataStreamSource.print();
// 异步调用 此时会调用open方法
JobExecutionResult execute = env.execute();
JobClient flink_job = env.executeAsync("Flink Job");
Thread.sleep(3000);
// 此时会调用 cancel 和 close
flink_job.cancel();
}
}
kafkaSource
package com.bigdata.day02;
import java.util.Properties;
public class KafkaSource {
public static void main(String[] args) throws Exception{
//env
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// properties
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "bigdata01:9092");
properties.setProperty("group.id", "g1");
// consumer
FlinkKafkaConsumer<String> consumer= new FlinkKafkaConsumer<String>("yhedu",new SimpleStringSchema(),properties);
// source
DataStreamSource<String> dataStreamSource = env.addSource(consumer);
dataStreamSource.filter(new FilterFunction<String>() {
@Override
public boolean filter(String s) throws Exception {
return s.contains("success");
}
}).print();
env.execute();
}
}