赞
踩
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# 创建 StreamExecutionEnvironment 对象
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
TEST_KAFKA_SERVERS = "127.0.0.1:9092"
TEST_KAFKA_TOPIC = "test_topic_elink"
TEST_GROUP_ID = "pyflink_elink_midsys"
def get_kafka_customer_properties(kafka_servers: str, group_id: str):
properties = {
"bootstrap.servers": kafka_servers,
"fetch.max.bytes": "67108864",
"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",
"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",
"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错
"group.id": group_id,
}
return properties
properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)
data_stream = env.add_source(
FlinkKafkaConsumer(topics=TEST_KAFKA_TOPIC,
properties=properties,
deserialization_schema=SimpleStringSchema()) \
.set_commit_offsets_on_checkpoints(True) \
.set_start_from_latest()
).name(f"消费{TEST_KAFKA_TOPIC}主题数据")
data_stream.print()
env.execute()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。