当前位置:   article > 正文

pyflink 消费kafka_pyflink 消费kafka数据包

pyflink 消费kafka数据包

from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from  pyflink.datastream.connectors import  DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime


logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)

# 创建 StreamExecutionEnvironment 对象
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")


TEST_KAFKA_SERVERS = "127.0.0.1:9092"
TEST_KAFKA_TOPIC = "test_topic_elink"
TEST_GROUP_ID = "pyflink_elink_midsys"


def get_kafka_customer_properties(kafka_servers: str, group_id: str):
    properties = {
        "bootstrap.servers": kafka_servers,
        "fetch.max.bytes": "67108864",
        "key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",
        "value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",
        "enable.auto.commit": "false",  # 关闭kafka 自动提交,此处不能传bool 类型会报错
        "group.id": group_id,
    }
    return properties


properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)
data_stream = env.add_source(
    FlinkKafkaConsumer(topics=TEST_KAFKA_TOPIC,
        properties=properties,
        deserialization_schema=SimpleStringSchema()) \
        .set_commit_offsets_on_checkpoints(True) \
        .set_start_from_latest()
).name(f"消费{TEST_KAFKA_TOPIC}主题数据")
data_stream.print()
env.execute()

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号