赞
踩
Sender.completeBatch
if (error != Errors.NONE && canRetry(batch, error)) {
// retry
log.warn("Got error produce response with correlation id {} on topic-partition {}, retrying ({} attempts left). Error: {}",
correlationId,
batch.topicPartition,
this.retries - batch.attempts - 1,
error);
//重新把发送失败等着批次 加入到队列里面。
this.accumulator.reenqueue(batch, now);
this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
} else {
Sender.run. this.accumulator.abortExpiredBatches public List<RecordBatch> abortExpiredBatches(int requestTimeout, long now) { List<RecordBatch> expiredBatches = new ArrayList<>(); int count = 0; // for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) { //获取到每个分区的队列 -》 队列里面对应的批次 Deque<RecordBatch> dq = entry.getValue(); TopicPartition tp = entry.getKey(); // We only check if the batch should be expired if the partition does not have a batch in flight. // This is to prevent later batches from being expired while an earlier batch is still in progress. // Note that `muted` is only ever populated if `max.in.flight.request.per.connection=1` so this protection // is only active in this case. Otherwise the expiration order is not guaranteed. if (!muted.contains(tp)) { synchronized (dq) { // iterate over the batches and expire them if they have been in the accumulator for more than requestTimeOut RecordBatch lastBatch = dq.peekLast(); //迭代的看每个分区里面的每个批次 Iterator<RecordBatch> batchIterator = dq.iterator(); while (batchIterator.hasNext()) { RecordBatch batch = batchIterator.next(); boolean isFull = batch != lastBatch || batch.records.isFull(); // check if the batch is expired //TODO 判断一下是否超时 if (batch.maybeExpire(requestTimeout, retryBackoffMs, now, this.lingerMs, isFull)) { //增加到超时的数据结构里面 expiredBatches.add(batch); count++; //从数据结构里面移除 batchIterator.remove(); //释放资源 deallocate(batch); } else { // Stop at the first batch that has not expired. break; } } } } } if (!expiredBatches.isEmpty()) log.trace("Expired {} batches in accumulator", count); return expiredBatches; } batch.maybeExpire public boolean maybeExpire(int requestTimeoutMs, long retryBackoffMs, long now, long lingerMs, boolean isFull) { boolean expire = false; String errorMessage = null; /** * requestTimeoutMs:代表的是请求发送的超时的时间。默认值是30. * now:当前时间 * lastAppendTime:批次的创建的时间(上一次重试的时间) * now - this.lastAppendTime 大于30秒,说明批次超时了 还没发送出去。 */ if (!this.inRetry() && isFull && requestTimeoutMs < (now - this.lastAppendTime)) { expire = true; //记录异常信息 errorMessage = (now - this.lastAppendTime) + " ms has passed since last append"; /** * lingerMs: 100ms,无论如何都要把消息发送出去的时间 * * createdMs:批次创建的时间 * * 已经大于30秒了。 说明也是超时了。 * */ } else if (!this.inRetry() && requestTimeoutMs < (now - (this.createdMs + lingerMs))) { expire = true; errorMessage = (now - (this.createdMs + lingerMs)) + " ms has passed since batch creation plus linger time"; /** * 针对重试 * lastAttemptMs: 上一次重试的时间(批次创建的时间) * retryBackoffMs: 重试的时间间隔 * 说明也是超时了。 */ } else if (this.inRetry() && requestTimeoutMs < (now - (this.lastAttemptMs + retryBackoffMs))) { expire = true; errorMessage = (now - (this.lastAttemptMs + retryBackoffMs)) + " ms has passed since last attempt plus backoff time"; } if (expire) { this.records.close(); //调用done方法 //方法里面传过去了一个TimeoutException的异常。(超时了) //TODO 处理超时的批次 this.done(-1L, Record.NO_TIMESTAMP, new TimeoutException("Expiring " + recordCount + " record(s) for " + topicPartition + " due to " + errorMessage)); } return expire; }
NetWorkClient.poll.handleTimedOutRequests private void handleTimedOutRequests(List<ClientResponse> responses, long now) { //获取到请求超时的主机。 List<String> nodeIds = this.inFlightRequests.getNodesWithTimedOutRequests(now, this.requestTimeoutMs); for (String nodeId : nodeIds) { // close connection to the node //关闭请求超时的主机的连接 this.selector.close(nodeId); log.debug("Disconnecting from node {} due to request timeout.", nodeId); //我们猜应该是会去修改 连接的状态 processDisconnection(responses, nodeId, now); } // we disconnected, so we should probably refresh our metadata if (nodeIds.size() > 0) metadataUpdater.requestUpdate(); } processDisconnection private void processDisconnection(List<ClientResponse> responses, String nodeId, long now) { //修改连接状态 connectionStates.disconnected(nodeId, now); // for (ClientRequest request : this.inFlightRequests.clearAll(nodeId)) { log.trace("Cancelled request {} due to node {} being disconnected", request, nodeId); if (!metadataUpdater.maybeHandleDisconnection(request)) //对这些请求进行处理 //大家会看到一个比较有意思的事 //自己封装了一个响应。这个响应里面没有服务端响应消息(服务端没给响应) //失去连接的状态表标识为true responses.add(new ClientResponse(request, now, true, null)); } } disconnected public void disconnected(String id, long now) { NodeConnectionState nodeState = nodeState(id); //修缓存的对应主机的连接转态:DISCONNECTED //sender -> 检查网络是否可以举报发送消息的条件 -> 是否可以尝试建立网络连接。 //如果主机的状态是:DISCONNECTED,可以尝试初始化连接。 //最后调用networkclient的poll方法(Selector 去完成的最后的网络连接) nodeState.state = ConnectionState.DISCONNECTED; nodeState.lastConnectAttemptMs = now; }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。