Class: Kafka::Client
- Inherits:
-
Object
- Object
- Kafka::Client
- Defined in:
- lib/kafka/client.rb
Instance Method Summary collapse
-
#alter_configs(broker_id, configs = []) ⇒ nil
Alter broker configs.
-
#alter_topic(name, configs = {}) ⇒ nil
Alter the configuration of a topic.
- #apis ⇒ Object
-
#async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1,, retry_backoff: 0, **options) ⇒ AsyncProducer
Creates a new AsyncProducer instance.
-
#brokers ⇒ Array<Kafka::BrokerInfo>
List all brokers in the cluster.
-
#close ⇒ nil
Closes all connections to the Kafka brokers and frees up used resources.
-
#consumer(group_id:, session_timeout: 30, rebalance_timeout: 60, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10, offset_retention_time: nil, fetcher_max_queue_size: 100, refresh_topic_interval: 0, interceptors: [], assignment_strategy: nil) ⇒ Consumer
Creates a new Kafka consumer.
-
#controller_broker ⇒ Kafka::BrokerInfo
The current controller broker in the cluster.
-
#create_partitions_for(name, num_partitions: 1, timeout: 30) ⇒ nil
Create partitions for a topic.
-
#create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {}) ⇒ nil
Creates a topic in the cluster.
-
#delete_topic(name, timeout: 30) ⇒ nil
Delete a topic in the cluster.
-
#deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1) ⇒ nil
Delivers a single message to the Kafka cluster.
-
#describe_configs(broker_id, configs = []) ⇒ Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry>
Describe broker configs.
-
#describe_group(group_id) ⇒ Kafka::Protocol::DescribeGroupsResponse::Group
Describe a consumer group.
-
#describe_topic(name, configs = []) ⇒ Hash<String, String>
Describe the configuration of a topic.
-
#each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block) ⇒ nil
Enumerate all messages in a topic.
-
#fetch_group_offsets(group_id) ⇒ Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo>>
Fetch all committed offsets for a consumer group.
-
#fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1) ⇒ Array<Kafka::FetchedMessage>
Fetches a batch of messages from a single partition.
-
#groups ⇒ Array<String>
Lists all consumer groups in the cluster.
- #has_topic?(topic) ⇒ Boolean
-
#initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil, ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil, sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil, sasl_aws_msk_iam_access_key_id: nil, sasl_aws_msk_iam_secret_key_id: nil, sasl_aws_msk_iam_aws_region: nil, sasl_aws_msk_iam_session_token: nil, sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true, resolve_seed_brokers: false) ⇒ Client
constructor
Initializes a new Kafka client.
-
#last_offset_for(topic, partition) ⇒ Integer
Retrieve the offset of the last message in a partition.
-
#last_offsets_for(*topics) ⇒ Hash<String, Hash<Integer, Integer>>
Retrieve the offset of the last message in each partition of the specified topics.
-
#partitions_for(topic) ⇒ Integer
Counts the number of partitions in a topic.
-
#producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60, interceptors: []) ⇒ Kafka::Producer
Initializes a new Kafka producer.
-
#replica_count_for(topic) ⇒ Integer
Counts the number of replicas for a topic's partition.
-
#supports_api?(api_key, version = nil) ⇒ Boolean
Check whether current cluster supports a specific version or not.
-
#topics ⇒ Array<String>
Lists all topics in the cluster.
Constructor Details
#initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil, ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil, sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil, sasl_aws_msk_iam_access_key_id: nil, sasl_aws_msk_iam_secret_key_id: nil, sasl_aws_msk_iam_aws_region: nil, sasl_aws_msk_iam_session_token: nil, sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true, resolve_seed_brokers: false) ⇒ Client
Initializes a new Kafka client.
Parameters:
-
seed_brokers
(Array<String>, String)
—
the list of brokers used to initialize the client. Either an Array of connections, or a comma separated string of connections. A connection can either be a string of "host:port" or a full URI with a scheme. If there's a scheme it's ignored and only host/port are used.
-
client_id
(String)
(defaults to: "ruby-kafka")
—
the identifier for this application.
-
logger
(Logger)
(defaults to: nil)
—
the logger that should be used by the client.
-
connect_timeout
(Integer, nil)
(defaults to: nil)
—
the timeout setting for connecting to brokers. See BrokerPool#initialize .
-
socket_timeout
(Integer, nil)
(defaults to: nil)
—
the timeout setting for socket connections. See BrokerPool#initialize .
-
ssl_ca_cert
(String, Array<String>, nil)
(defaults to: nil)
—
a PEM encoded CA cert, or an Array of PEM encoded CA certs, to use with an SSL connection.
-
ssl_ca_cert_file_path
(String, Array<String>, nil)
(defaults to: nil)
—
a path on the filesystem, or an Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
-
ssl_client_cert
(String, nil)
(defaults to: nil)
—
a PEM encoded client cert to use with an SSL connection. Must be used in combination with ssl_client_cert_key.
-
ssl_client_cert_key
(String, nil)
(defaults to: nil)
—
a PEM encoded client cert key to use with an SSL connection. Must be used in combination with ssl_client_cert.
-
ssl_client_cert_key_password
(String, nil)
(defaults to: nil)
—
the password required to read the ssl_client_cert_key. Must be used in combination with ssl_client_cert_key.
-
sasl_gssapi_principal
(String, nil)
(defaults to: nil)
—
a KRB5 principal
-
sasl_gssapi_keytab
(String, nil)
(defaults to: nil)
—
a KRB5 keytab filepath
-
sasl_scram_username
(String, nil)
(defaults to: nil)
—
SCRAM username
-
sasl_scram_password
(String, nil)
(defaults to: nil)
—
SCRAM password
-
sasl_scram_mechanism
(String, nil)
(defaults to: nil)
—
Scram mechanism, either "sha256" or "sha512"
-
sasl_over_ssl
(Boolean)
(defaults to: true)
—
whether to enforce SSL with SASL
-
ssl_ca_certs_from_system
(Boolean)
(defaults to: false)
—
whether to use the CA certs from the system's default certificate store.
-
partitioner
(Partitioner , nil)
(defaults to: nil)
—
the partitioner that should be used by the client.
-
sasl_oauth_token_provider
(Object, nil)
(defaults to: nil)
—
OAuthBearer Token Provider instance that implements method token. See Sasl::OAuth#initialize
-
ssl_verify_hostname
(Boolean, true)
(defaults to: true)
—
whether to verify that the host serving the SSL certificate and the signing chain of the certificate have the correct domains based on the CA certificate
-
resolve_seed_brokers
(Boolean)
(defaults to: false)
—
whether to resolve each hostname of the seed brokers. If a broker is resolved to multiple IP addresses, the client tries to connect to each of the addresses until it can connect.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
# File 'lib/kafka/client.rb', line 83 def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil, ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil, sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil, sasl_aws_msk_iam_access_key_id: nil, sasl_aws_msk_iam_secret_key_id: nil, sasl_aws_msk_iam_aws_region: nil, sasl_aws_msk_iam_session_token: nil, sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true, resolve_seed_brokers: false) @logger = TaggedLogger.new(logger) @instrumenter = Instrumenter.new(client_id: client_id) @seed_brokers = normalize_seed_brokers(seed_brokers) @resolve_seed_brokers = resolve_seed_brokers ssl_context = SslContext.build( ca_cert_file_path: ssl_ca_cert_file_path, ca_cert: ssl_ca_cert, client_cert: ssl_client_cert, client_cert_key: ssl_client_cert_key, client_cert_key_password: ssl_client_cert_key_password, client_cert_chain: ssl_client_cert_chain, ca_certs_from_system: ssl_ca_certs_from_system, verify_hostname: ssl_verify_hostname ) sasl_authenticator = SaslAuthenticator.new( sasl_gssapi_principal: sasl_gssapi_principal, sasl_gssapi_keytab: sasl_gssapi_keytab, sasl_plain_authzid: sasl_plain_authzid, sasl_plain_username: sasl_plain_username, sasl_plain_password: sasl_plain_password, sasl_scram_username: sasl_scram_username, sasl_scram_password: sasl_scram_password, sasl_scram_mechanism: sasl_scram_mechanism, sasl_aws_msk_iam_access_key_id: sasl_aws_msk_iam_access_key_id, sasl_aws_msk_iam_secret_key_id: sasl_aws_msk_iam_secret_key_id, sasl_aws_msk_iam_aws_region: sasl_aws_msk_iam_aws_region, sasl_aws_msk_iam_session_token: sasl_aws_msk_iam_session_token, sasl_oauth_token_provider: sasl_oauth_token_provider, logger: @logger ) if sasl_authenticator.enabled? && sasl_over_ssl && ssl_context.nil? raise ArgumentError, "SASL authentication requires that SSL is configured" end @connection_builder = ConnectionBuilder.new( client_id: client_id, connect_timeout: connect_timeout, socket_timeout: socket_timeout, ssl_context: ssl_context, logger: @logger, instrumenter: @instrumenter, sasl_authenticator: sasl_authenticator ) @cluster = initialize_cluster @partitioner = partitioner || Partitioner.new end
Instance Method Details
#alter_configs(broker_id, configs = []) ⇒ nil
Alter broker configs
Parameters:
-
broker_id
(int)
—
the id of the broker
-
configs
(Array)
(defaults to: [])
—
array of config strings.
Returns:
- (nil)
598 599 600
# File 'lib/kafka/client.rb', line 598 def alter_configs(broker_id, configs = []) @cluster.alter_configs(broker_id, configs) end
#alter_topic(name, configs = {}) ⇒ nil
This is an alpha level API and is subject to change.
Alter the configuration of a topic.
Configuration keys must match Kafka's topic-level configs.
Examples:
Describing the cleanup policy config of a topic
Describing the cleanup policy config of a topic
kafka = Kafka.new(["kafka1:9092"])
kafka.alter_topic("my-topic", "cleanup.policy" => "delete", "max.message.byte" => "100000")
Parameters:
-
name
(String)
—
the name of the topic.
-
configs
(Hash<String, String>)
(defaults to: {})
—
hash of desired config keys and values.
Returns:
- (nil)
675 676 677
# File 'lib/kafka/client.rb', line 675 def alter_topic(name, configs = {}) @cluster.alter_topic(name, configs) end
#apis ⇒ Object
792 793 794
# File 'lib/kafka/client.rb', line 792 def apis @cluster.apis end
#async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1,, retry_backoff: 0, **options) ⇒ AsyncProducer
Creates a new AsyncProducer instance.
All parameters allowed by #producer can be passed. In addition to this, a few extra parameters can be passed when creating an async producer.
Parameters:
-
max_queue_size
(Integer)
(defaults to: 1000)
—
the maximum number of messages allowed in the queue.
-
delivery_threshold
(Integer)
(defaults to: 0)
—
if greater than zero, the number of buffered messages that will automatically trigger a delivery.
-
delivery_interval
(Integer)
(defaults to: 0)
—
if greater than zero, the number of seconds between automatic message deliveries.
Returns:
See Also:
340 341 342 343 344 345 346 347 348 349 350 351 352 353
# File 'lib/kafka/client.rb', line 340 def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1, retry_backoff: 0, **options) sync_producer = producer(**options) AsyncProducer.new( sync_producer: sync_producer, delivery_interval: delivery_interval, delivery_threshold: delivery_threshold, max_queue_size: max_queue_size, max_retries: max_retries, retry_backoff: retry_backoff, instrumenter: @instrumenter, logger: @logger, ) end
#brokers ⇒ Array<Kafka::BrokerInfo >
List all brokers in the cluster.
Returns:
-
(Array<Kafka::BrokerInfo >)
—
the list of brokers.
799 800 801
# File 'lib/kafka/client.rb', line 799 def brokers @cluster.cluster_info.brokers end
#close ⇒ nil
Closes all connections to the Kafka brokers and frees up used resources.
Returns:
- (nil)
813 814 815
# File 'lib/kafka/client.rb', line 813 def close @cluster.disconnect end
#consumer(group_id:, session_timeout: 30, rebalance_timeout: 60, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10, offset_retention_time: nil, fetcher_max_queue_size: 100, refresh_topic_interval: 0, interceptors: [], assignment_strategy: nil) ⇒ Consumer
Creates a new Kafka consumer.
Parameters:
-
group_id
(String)
—
the id of the group that the consumer should join.
-
session_timeout
(Integer)
(defaults to: 30)
—
the number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
-
offset_commit_interval
(Integer)
(defaults to: 10)
—
the interval between offset commits, in seconds.
-
offset_commit_threshold
(Integer)
(defaults to: 0)
—
the number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing.
-
heartbeat_interval
(Integer)
(defaults to: 10)
—
the interval between heartbeats; must be less than the session window.
-
offset_retention_time
(Integer)
(defaults to: nil)
—
the time period that committed offsets will be retained, in seconds. Defaults to the broker setting.
-
fetcher_max_queue_size
(Integer)
(defaults to: 100)
—
max number of items in the fetch queue that are stored for further processing. Note, that each item in the queue represents a response from a single broker.
-
refresh_topic_interval
(Integer)
(defaults to: 0)
—
interval of refreshing the topic list. If it is 0, the topic list won't be refreshed (default) If it is n (n > 0), the topic list will be refreshed every n seconds
-
interceptors
(Array<Object>)
(defaults to: [])
—
a list of consumer interceptors that implement
call(Kafka::FetchedBatch)
. -
assignment_strategy
(Object)
(defaults to: nil)
—
a partition assignment strategy that implements
protocol_type()
,user_data()
, andassign(members:, partitions:)
Returns:
- (Consumer )
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
# File 'lib/kafka/client.rb', line 380 def consumer( group_id:, session_timeout: 30, rebalance_timeout: 60, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10, offset_retention_time: nil, fetcher_max_queue_size: 100, refresh_topic_interval: 0, interceptors: [], assignment_strategy: nil ) cluster = initialize_cluster instrumenter = DecoratingInstrumenter.new(@instrumenter, { group_id: group_id, }) # The Kafka protocol expects the retention time to be in ms. retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1 group = ConsumerGroup.new( cluster: cluster, logger: @logger, group_id: group_id, session_timeout: session_timeout, rebalance_timeout: rebalance_timeout, retention_time: retention_time, instrumenter: instrumenter, assignment_strategy: assignment_strategy ) fetcher = Fetcher.new( cluster: initialize_cluster, group: group, logger: @logger, instrumenter: instrumenter, max_queue_size: fetcher_max_queue_size ) offset_manager = OffsetManager.new( cluster: cluster, group: group, fetcher: fetcher, logger: @logger, commit_interval: offset_commit_interval, commit_threshold: offset_commit_threshold, offset_retention_time: offset_retention_time ) heartbeat = Heartbeat.new( group: group, interval: heartbeat_interval, instrumenter: instrumenter ) Consumer.new( cluster: cluster, logger: @logger, instrumenter: instrumenter, group: group, offset_manager: offset_manager, fetcher: fetcher, session_timeout: session_timeout, heartbeat: heartbeat, refresh_topic_interval: refresh_topic_interval, interceptors: interceptors ) end
#controller_broker ⇒ Kafka::BrokerInfo
The current controller broker in the cluster.
Returns:
-
(Kafka::BrokerInfo )
—
information on the controller broker.
806 807 808
# File 'lib/kafka/client.rb', line 806 def controller_broker brokers.find {|broker| broker.node_id == @cluster.cluster_info.controller_id } end
#create_partitions_for(name, num_partitions: 1, timeout: 30) ⇒ nil
Create partitions for a topic.
the topic partitions to be added.
Parameters:
-
name
(String)
—
the name of the topic.
-
num_partitions
(Integer)
(defaults to: 1)
—
the number of desired partitions for
-
timeout
(Integer)
(defaults to: 30)
—
a duration of time to wait for the new
Returns:
- (nil)
703 704 705
# File 'lib/kafka/client.rb', line 703 def create_partitions_for(name, num_partitions: 1, timeout: 30) @cluster.create_partitions_for(name, num_partitions: num_partitions, timeout: timeout) end
#create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {}) ⇒ nil
Creates a topic in the cluster.
Examples:
Creating a topic with log compaction
Creating a topic with log compaction
# Enable log compaction:
config = { "cleanup.policy" => "compact" }
# Create the topic:
kafka.create_topic("dns-mappings", config: config)
Parameters:
-
name
(String)
—
the name of the topic.
-
num_partitions
(Integer)
(defaults to: 1)
—
the number of partitions that should be created in the topic.
-
replication_factor
(Integer)
(defaults to: 1)
—
the replication factor of the topic.
-
timeout
(Integer)
(defaults to: 30)
—
a duration of time to wait for the topic to be completely created.
-
config
(Hash)
(defaults to: {})
—
topic configuration entries. See the Kafka documentation for more information.
Returns:
- (nil)
Raises:
-
(Kafka::TopicAlreadyExists )
—
if the topic already exists.
622 623 624 625 626 627 628 629 630
# File 'lib/kafka/client.rb', line 622 def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {}) @cluster.create_topic( name, num_partitions: num_partitions, replication_factor: replication_factor, timeout: timeout, config: config, ) end
#delete_topic(name, timeout: 30) ⇒ nil
Delete a topic in the cluster.
Parameters:
-
name
(String)
—
the name of the topic.
-
timeout
(Integer)
(defaults to: 30)
—
a duration of time to wait for the topic to be completely marked deleted.
Returns:
- (nil)
638 639 640
# File 'lib/kafka/client.rb', line 638 def delete_topic(name, timeout: 30) @cluster.delete_topic(name, timeout: timeout) end
#deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1) ⇒ nil
Delivers a single message to the Kafka cluster.
Note: Only use this API for low-throughput scenarios. If you want to deliver many messages at a high rate, or if you want to configure the way messages are sent, use the #producer or #async_producer APIs instead.
Parameters:
-
value
(String, nil)
—
the message value.
-
key
(String, nil)
(defaults to: nil)
—
the message key.
-
headers
(Hash<String, String>)
(defaults to: {})
—
the headers for the message.
-
topic
(String)
—
the topic that the message should be written to.
-
partition
(Integer, nil)
(defaults to: nil)
—
the partition that the message should be written to, or
nil
if eitherpartition_key
is passed or the partition should be chosen at random. -
partition_key
(String)
(defaults to: nil)
—
a value used to deterministically choose a partition to write to.
-
retries
(Integer)
(defaults to: 1)
—
the number of times to retry the delivery before giving up.
Returns:
- (nil)
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
# File 'lib/kafka/client.rb', line 163 def deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1) create_time = Time.now # We want to fail fast if `topic` isn't a String topic = topic.to_str message = PendingMessage.new( value: value, key: key, headers: headers, topic: topic, partition: partition, partition_key: partition_key, create_time: create_time ) if partition.nil? partition_count = @cluster.partitions_for(topic).count partition = @partitioner.call(partition_count, message) end buffer = MessageBuffer.new buffer.write( value: message.value, key: message.key, headers: message.headers, topic: message.topic, partition: partition, create_time: message.create_time, ) @cluster.add_target_topics([topic]) compressor = Compressor.new( instrumenter: @instrumenter, ) transaction_manager = TransactionManager.new( cluster: @cluster, logger: @logger, idempotent: false, transactional: false ) operation = ProduceOperation.new( cluster: @cluster, transaction_manager: transaction_manager, buffer: buffer, required_acks: 1, ack_timeout: 10, compressor: compressor, logger: @logger, instrumenter: @instrumenter, ) attempt = 1 begin @cluster.refresh_metadata_if_necessary! operation.execute unless buffer.empty? raise DeliveryFailed.new(nil, [message]) end rescue Kafka::Error => e @cluster.mark_as_stale! if attempt >= (retries + 1) raise else attempt += 1 @logger.warn "Error while delivering message, #{e.class}: #{e.message}; retrying after 1s..." sleep 1 retry end end end
#describe_configs(broker_id, configs = []) ⇒ Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry >
Describe broker configs
Parameters:
-
broker_id
(int)
—
the id of the broker
-
configs
(Array)
(defaults to: [])
—
array of config keys.
Returns:
589 590 591
# File 'lib/kafka/client.rb', line 589 def describe_configs(broker_id, configs = []) @cluster.describe_configs(broker_id, configs) end
#describe_group(group_id) ⇒ Kafka::Protocol::DescribeGroupsResponse::Group
Describe a consumer group
Parameters:
-
group_id
(String)
—
the id of the consumer group
Returns:
683 684 685
# File 'lib/kafka/client.rb', line 683 def describe_group(group_id) @cluster.describe_group(group_id) end
#describe_topic(name, configs = []) ⇒ Hash<String, String>
This is an alpha level API and is subject to change.
Describe the configuration of a topic.
Retrieves the topic configuration from the Kafka brokers. Configuration names refer to Kafka's topic-level configs.
Examples:
Describing the cleanup policy config of a topic
Describing the cleanup policy config of a topic
kafka = Kafka.new(["kafka1:9092"])
kafka.describe_topic("my-topic", ["cleanup.policy"])
#=> { "cleanup.policy" => "delete" }
Parameters:
-
name
(String)
—
the name of the topic.
-
configs
(Array<String>)
(defaults to: [])
—
array of desired config names.
Returns:
- (Hash<String, String>)
657 658 659
# File 'lib/kafka/client.rb', line 657 def describe_topic(name, configs = []) @cluster.describe_topic(name, configs) end
#each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block) ⇒ nil
Enumerate all messages in a topic.
Parameters:
-
topic
(String)
—
the topic to consume messages from.
-
start_from_beginning
(Boolean)
(defaults to: true)
—
whether to start from the beginning of the topic or just subscribe to new messages being produced.
-
max_wait_time
(Integer)
(defaults to: 5)
—
the maximum amount of time to wait before the server responds, in seconds.
-
min_bytes
(Integer)
(defaults to: 1)
—
the minimum number of bytes to wait for. If set to zero, the broker will respond immediately, but the response may be empty. The default is 1 byte, which means that the broker will respond as soon as a message is written to the partition.
-
max_bytes
(Integer)
(defaults to: 1048576)
—
the maximum number of bytes to include in the response message set. Default is 1 MB. You need to set this higher if you expect messages to be larger than this.
Returns:
- (nil)
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
# File 'lib/kafka/client.rb', line 558 def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block) default_offset ||= start_from_beginning ? :earliest : :latest offsets = Hash.new { default_offset } loop do operation = FetchOperation.new( cluster: @cluster, logger: @logger, min_bytes: min_bytes, max_wait_time: max_wait_time, ) @cluster.partitions_for(topic).map(&:partition_id).each do |partition| partition_offset = offsets[partition] operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes) end batches = operation.execute batches.each do |batch| batch.messages.each(&block) offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset? end end end
#fetch_group_offsets(group_id) ⇒ Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo >>
Fetch all committed offsets for a consumer group
Parameters:
-
group_id
(String)
—
the id of the consumer group
Returns:
- (Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo >>)
691 692 693
# File 'lib/kafka/client.rb', line 691 def fetch_group_offsets(group_id) @cluster.fetch_group_offsets(group_id) end
#fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1) ⇒ Array<Kafka::FetchedMessage >
Fetches a batch of messages from a single partition. Note that it's possible to get back empty batches.
The starting point for the fetch can be configured with the :offset
argument.
If you pass a number, the fetch will start at that offset. However, there are
two special Symbol values that can be passed instead:
:earliest
— the first offset in the partition.:latest
— the next offset that will be written to, effectively making the call block until there is a new message in the partition.
The Kafka protocol specifies the numeric values of these two options: -2 and -1, respectively. You can also pass in these numbers directly.
Example
When enumerating the messages in a partition, you typically fetch batches sequentially.
offset = :earliest
loop do
messages = kafka.fetch_messages(
topic: "my-topic",
partition: 42,
offset: offset,
)
messages.each do |message|
puts message.offset, message.key, message.value
# Set the next offset that should be read to be the subsequent
# offset.
offset = message.offset + 1
end
end
See a working example in examples/simple-consumer.rb
.
Parameters:
-
topic
(String)
—
the topic that messages should be fetched from.
-
partition
(Integer)
—
the partition that messages should be fetched from.
-
offset
(Integer, Symbol)
(defaults to: :latest)
—
the offset to start reading from. Default is the latest offset.
-
max_wait_time
(Integer)
(defaults to: 5)
—
the maximum amount of time to wait before the server responds, in seconds.
-
min_bytes
(Integer)
(defaults to: 1)
—
the minimum number of bytes to wait for. If set to zero, the broker will respond immediately, but the response may be empty. The default is 1 byte, which means that the broker will respond as soon as a message is written to the partition.
-
max_bytes
(Integer)
(defaults to: 1048576)
—
the maximum number of bytes to include in the response message set. Default is 1 MB. You need to set this higher if you expect messages to be larger than this.
Returns:
-
(Array<Kafka::FetchedMessage >)
—
the messages returned from the broker.
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
# File 'lib/kafka/client.rb', line 510 def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1) operation = FetchOperation.new( cluster: @cluster, logger: @logger, min_bytes: min_bytes, max_bytes: max_bytes, max_wait_time: max_wait_time, ) operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes) attempt = 1 begin operation.execute.flat_map {|batch| batch.messages } rescue Kafka::Error => e @cluster.mark_as_stale! if attempt >= (retries + 1) raise else attempt += 1 @logger.warn "Error while fetching messages, #{e.class}: #{e.message}; retrying..." retry end end end
#groups ⇒ Array<String>
Lists all consumer groups in the cluster
Returns:
-
(Array<String>)
—
the list of group ids
725 726 727
# File 'lib/kafka/client.rb', line 725 def groups @cluster.list_groups end
#has_topic?(topic) ⇒ Boolean
Returns:
- (Boolean)
729 730 731 732 733
# File 'lib/kafka/client.rb', line 729 def has_topic?(topic) @cluster.clear_target_topics @cluster.add_target_topics([topic]) @cluster.topics.include?(topic) end
#last_offset_for(topic, partition) ⇒ Integer
Retrieve the offset of the last message in a partition. If there are no messages in the partition -1 is returned.
Parameters:
- topic (String)
- partition (Integer)
Returns:
-
(Integer)
—
the offset of the last message in the partition, or -1 if there are no messages in the partition.
758 759 760 761 762
# File 'lib/kafka/client.rb', line 758 def last_offset_for(topic, partition) # The offset resolution API will return the offset of the "next" message to # be written when resolving the "latest" offset, so we subtract one. @cluster.resolve_offset(topic, partition, :latest) - 1 end
#last_offsets_for(*topics) ⇒ Hash<String, Hash<Integer, Integer>>
Retrieve the offset of the last message in each partition of the specified topics.
Examples:
last_offsets_for('topic-1', 'topic-2') # =>
# {
# 'topic-1' => { 0 => 100, 1 => 100 },
# 'topic-2' => { 0 => 100, 1 => 100 }
# }
Parameters:
-
topics
(Array<String>)
—
topic names.
Returns:
- (Hash<String, Hash<Integer, Integer>>)
774 775 776 777 778 779 780 781
# File 'lib/kafka/client.rb', line 774 def last_offsets_for(*topics) @cluster.add_target_topics(topics) topics.map {|topic| partition_ids = @cluster.partitions_for(topic).collect(&:partition_id) partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest) [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h] }.to_h end
#partitions_for(topic) ⇒ Integer
Counts the number of partitions in a topic.
Parameters:
- topic (String)
Returns:
-
(Integer)
—
the number of partitions in the topic.
739 740 741
# File 'lib/kafka/client.rb', line 739 def partitions_for(topic) @cluster.partitions_for(topic).count end
#producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60, interceptors: []) ⇒ Kafka::Producer
Initializes a new Kafka producer.
Parameters:
-
ack_timeout
(Integer)
(defaults to: 5)
—
The number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout.
-
required_acks
(Integer, Symbol)
(defaults to: :all)
—
The number of replicas that must acknowledge a write, or
:all
if all in-sync replicas must acknowledge. -
max_retries
(Integer)
(defaults to: 2)
—
the number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt.
-
retry_backoff
(Integer)
(defaults to: 1)
—
the number of seconds to wait between retries.
-
max_buffer_size
(Integer)
(defaults to: 1000)
—
the number of messages allowed in the buffer before new writes will raise BufferOverflow exceptions.
-
max_buffer_bytesize
(Integer)
(defaults to: 10_000_000)
—
the maximum size of the buffer in bytes. attempting to produce messages when the buffer reaches this size will result in BufferOverflow being raised.
-
compression_codec
(Symbol, nil)
(defaults to: nil)
—
the name of the compression codec to use, or nil if no compression should be performed. Valid codecs:
:snappy
,:gzip
,:lz4
,:zstd
-
compression_threshold
(Integer)
(defaults to: 1)
—
the number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer.
-
interceptors
(Array<Object>)
(defaults to: [])
—
a list of producer interceptors the implement
call(Kafka::PendingMessage)
.
Returns:
-
(Kafka::Producer )
—
the Kafka producer.
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
# File 'lib/kafka/client.rb', line 278 def producer( compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60, interceptors: [] ) cluster = initialize_cluster compressor = Compressor.new( codec_name: compression_codec, threshold: compression_threshold, instrumenter: @instrumenter, ) transaction_manager = TransactionManager.new( cluster: cluster, logger: @logger, idempotent: idempotent, transactional: transactional, transactional_id: transactional_id, transactional_timeout: transactional_timeout, ) Producer.new( cluster: cluster, transaction_manager: transaction_manager, logger: @logger, instrumenter: @instrumenter, compressor: compressor, ack_timeout: ack_timeout, required_acks: required_acks, max_retries: max_retries, retry_backoff: retry_backoff, max_buffer_size: max_buffer_size, max_buffer_bytesize: max_buffer_bytesize, partitioner: @partitioner, interceptors: interceptors ) end
#replica_count_for(topic) ⇒ Integer
Counts the number of replicas for a topic's partition
Parameters:
- topic (String)
Returns:
-
(Integer)
—
the number of replica nodes for the topic's partition
747 748 749
# File 'lib/kafka/client.rb', line 747 def replica_count_for(topic) @cluster.partitions_for(topic).first.replicas.count end
#supports_api?(api_key, version = nil) ⇒ Boolean
Check whether current cluster supports a specific version or not
Parameters:
-
api_key
(Integer)
—
API key.
-
version
(Integer)
(defaults to: nil)
—
API version.
Returns:
- (Boolean)
788 789 790
# File 'lib/kafka/client.rb', line 788 def supports_api?(api_key, version = nil) @cluster.supports_api?(api_key, version) end
#topics ⇒ Array<String>
Lists all topics in the cluster.
Returns:
-
(Array<String>)
—
the list of topic names.
710 711 712 713 714 715 716 717 718 719 720
# File 'lib/kafka/client.rb', line 710 def topics attempts = 0 begin attempts += 1 @cluster.list_topics rescue Kafka::ConnectionError @cluster.mark_as_stale! retry unless attempts > 1 raise end end