20
20
use std:: collections:: hash_map:: Entry ;
21
21
use std:: collections:: HashMap ;
22
22
use std:: fmt;
23
- use std:: iter:: once;
24
23
use std:: path:: Path ;
25
24
use std:: sync:: Arc ;
26
25
use std:: time:: Duration ;
@@ -49,8 +48,9 @@ use tracing::{error, info, warn};
49
48
50
49
use super :: fetch:: FetchStreamTask ;
51
50
use super :: models:: IngesterShard ;
52
- use super :: mrecord:: MRecord ;
53
- use super :: mrecordlog_utils:: { append_eof_record_if_necessary, check_enough_capacity} ;
51
+ use super :: mrecordlog_utils:: {
52
+ append_doc_batch, append_eof_record_if_necessary, check_enough_capacity, AppendDocBatchError ,
53
+ } ;
54
54
use super :: rate_limiter:: { RateLimiter , RateLimiterSettings } ;
55
55
use super :: replication:: {
56
56
ReplicationStreamTask , ReplicationStreamTaskHandle , ReplicationTask , ReplicationTaskHandle ,
@@ -101,6 +101,49 @@ pub(super) struct IngesterState {
101
101
pub observation_tx : watch:: Sender < IngestV2Result < ObservationMessage > > ,
102
102
}
103
103
104
+ impl IngesterState {
105
+ async fn close_shards ( & mut self , queue_ids : & [ QueueId ] ) {
106
+ for queue_id in queue_ids {
107
+ append_eof_record_if_necessary ( & mut self . mrecordlog , queue_id) . await ;
108
+
109
+ if let Some ( shard) = self . shards . get_mut ( queue_id) {
110
+ shard. shard_state = ShardState :: Closed ;
111
+ shard. notify_new_records ( ) ;
112
+ }
113
+ }
114
+ // TODO: Handle replicated shards.
115
+ }
116
+
117
+ pub async fn decommission ( & mut self ) {
118
+ let queue_ids: Vec < QueueId > = self . shards . keys ( ) . cloned ( ) . collect ( ) ;
119
+ self . close_shards ( & queue_ids) . await ;
120
+
121
+ self . status = IngesterStatus :: Decommissioning ;
122
+ self . check_decommissioning_status ( ) ;
123
+ }
124
+
125
+ /// Checks whether the ingester is fully decommissioned and updates its status accordingly.
126
+ fn check_decommissioning_status ( & mut self ) {
127
+ if self . status != IngesterStatus :: Decommissioning {
128
+ return ;
129
+ }
130
+ if self . shards . values ( ) . all ( |shard| {
131
+ shard. shard_state . is_closed ( ) && shard. truncation_position_inclusive == Position :: Eof
132
+ } ) {
133
+ info ! ( "ingester fully decommissioned" ) ;
134
+ self . status = IngesterStatus :: Decommissioned ;
135
+
136
+ self . observation_tx . send_if_modified ( |observation_result| {
137
+ if let Ok ( observation) = observation_result {
138
+ observation. status = IngesterStatus :: Decommissioned as i32 ;
139
+ return true ;
140
+ }
141
+ false
142
+ } ) ;
143
+ }
144
+ }
145
+ }
146
+
104
147
impl Ingester {
105
148
pub async fn try_new (
106
149
self_node_id : NodeId ,
@@ -158,27 +201,6 @@ impl Ingester {
158
201
Ok ( ingester)
159
202
}
160
203
161
- /// Checks whether the ingester is fully decommissioned and updates its status accordingly.
162
- fn check_decommissioning_status ( & self , state : & mut IngesterState ) {
163
- if state. status != IngesterStatus :: Decommissioning {
164
- return ;
165
- }
166
- if state. shards . values ( ) . all ( |shard| {
167
- shard. shard_state . is_closed ( ) && shard. truncation_position_inclusive == Position :: Eof
168
- } ) {
169
- info ! ( "ingester fully decommissioned" ) ;
170
- state. status = IngesterStatus :: Decommissioned ;
171
-
172
- state. observation_tx . send_if_modified ( |observation_result| {
173
- if let Ok ( observation) = observation_result {
174
- observation. status = IngesterStatus :: Decommissioned as i32 ;
175
- return true ;
176
- }
177
- false
178
- } ) ;
179
- }
180
- }
181
-
182
204
async fn init ( & self ) -> IngestV2Result < ( ) > {
183
205
let mut state_guard = self . state . write ( ) . await ;
184
206
@@ -246,18 +268,6 @@ impl Ingester {
246
268
Ok ( entry. or_insert ( shard) )
247
269
}
248
270
249
- async fn close_shards_inner ( & self , state : & mut IngesterState , queue_ids : & [ QueueId ] ) {
250
- for queue_id in queue_ids {
251
- append_eof_record_if_necessary ( & mut state. mrecordlog , queue_id) . await ;
252
-
253
- if let Some ( shard) = state. shards . get_mut ( queue_id) {
254
- shard. shard_state = ShardState :: Closed ;
255
- shard. notify_new_records ( ) ;
256
- }
257
- }
258
- // TODO: Handle replicated shards.
259
- }
260
-
261
271
async fn init_replication_stream (
262
272
& self ,
263
273
state : & mut IngesterState ,
@@ -322,6 +332,7 @@ impl IngesterService for Ingester {
322
332
let mut persist_successes = Vec :: with_capacity ( persist_request. subrequests . len ( ) ) ;
323
333
let mut persist_failures = Vec :: new ( ) ;
324
334
let mut replicate_subrequests: HashMap < NodeId , Vec < ReplicateSubrequest > > = HashMap :: new ( ) ;
335
+ let mut should_decommission = false ;
325
336
326
337
let commit_type = persist_request. commit_type ( ) ;
327
338
let force_commit = commit_type == CommitTypeV2 :: Force ;
@@ -350,6 +361,17 @@ impl IngesterService for Ingester {
350
361
return Ok ( persist_response) ;
351
362
}
352
363
for subrequest in persist_request. subrequests {
364
+ if should_decommission {
365
+ let persist_failure = PersistFailure {
366
+ subrequest_id : subrequest. subrequest_id ,
367
+ index_uid : subrequest. index_uid ,
368
+ source_id : subrequest. source_id ,
369
+ shard_id : subrequest. shard_id ,
370
+ reason : PersistFailureReason :: ShardClosed as i32 ,
371
+ } ;
372
+ persist_failures. push ( persist_failure) ;
373
+ continue ;
374
+ }
353
375
let queue_id = subrequest. queue_id ( ) ;
354
376
let follower_id_opt: Option < NodeId > = subrequest. follower_id . map ( Into :: into) ;
355
377
let shard = if let Some ( shard) = state_guard. shards . get_mut ( & queue_id) {
@@ -435,25 +457,41 @@ impl IngesterService for Ingester {
435
457
persist_failures. push ( persist_failure) ;
436
458
continue ;
437
459
}
438
- let current_position_inclusive: Position = if force_commit {
439
- let encoded_mrecords = doc_batch
440
- . docs ( )
441
- . map ( |doc| MRecord :: Doc ( doc) . encode ( ) )
442
- . chain ( once ( MRecord :: Commit . encode ( ) ) ) ;
443
- state_guard
444
- . mrecordlog
445
- . append_records ( & queue_id, None , encoded_mrecords)
446
- . await
447
- . expect ( "TODO" ) // TODO: Io error, close shard?
448
- } else {
449
- let encoded_mrecords = doc_batch. docs ( ) . map ( |doc| MRecord :: Doc ( doc) . encode ( ) ) ;
450
- state_guard
451
- . mrecordlog
452
- . append_records ( & queue_id, None , encoded_mrecords)
453
- . await
454
- . expect ( "TODO" ) // TODO: Io error, close shard?
455
- }
456
- . into ( ) ;
460
+ let append_result = append_doc_batch (
461
+ & mut state_guard. mrecordlog ,
462
+ & queue_id,
463
+ & doc_batch,
464
+ force_commit,
465
+ )
466
+ . await ;
467
+
468
+ let current_position_inclusive = match append_result {
469
+ Ok ( current_position_inclusive) => current_position_inclusive,
470
+ Err ( append_error) => {
471
+ let reason = match & append_error {
472
+ AppendDocBatchError :: Io ( _) => {
473
+ error ! (
474
+ "failed to append records to shard `{queue_id}`: {append_error}"
475
+ ) ;
476
+ should_decommission = true ;
477
+ PersistFailureReason :: ShardClosed
478
+ }
479
+ AppendDocBatchError :: QueueNotFound ( _) => {
480
+ warn ! ( "{append_error}" ) ;
481
+ PersistFailureReason :: ShardNotFound
482
+ }
483
+ } ;
484
+ let persist_failure = PersistFailure {
485
+ subrequest_id : subrequest. subrequest_id ,
486
+ index_uid : subrequest. index_uid ,
487
+ source_id : subrequest. source_id ,
488
+ shard_id : subrequest. shard_id ,
489
+ reason : reason as i32 ,
490
+ } ;
491
+ persist_failures. push ( persist_failure) ;
492
+ continue ;
493
+ }
494
+ } ;
457
495
let batch_num_bytes = doc_batch. num_bytes ( ) as u64 ;
458
496
let batch_num_docs = doc_batch. num_docs ( ) as u64 ;
459
497
@@ -549,6 +587,7 @@ impl IngesterService for Ingester {
549
587
// already.
550
588
let persist_failure_reason = match replicate_failure. reason ( ) {
551
589
ReplicateFailureReason :: Unspecified => PersistFailureReason :: Unspecified ,
590
+ ReplicateFailureReason :: ShardNotFound => PersistFailureReason :: ShardNotFound ,
552
591
ReplicateFailureReason :: ShardClosed => PersistFailureReason :: ShardClosed ,
553
592
ReplicateFailureReason :: ResourceExhausted => {
554
593
PersistFailureReason :: ResourceExhausted
@@ -564,7 +603,13 @@ impl IngesterService for Ingester {
564
603
persist_failures. push ( persist_failure) ;
565
604
}
566
605
}
606
+ if should_decommission {
607
+ error ! ( "decommissioning ingester after IO error" ) ;
608
+ let mut state_guard = self . state . write ( ) . await ;
609
+ state_guard. decommission ( ) . await ;
610
+ }
567
611
let leader_id = self . self_node_id . to_string ( ) ;
612
+
568
613
let persist_response = PersistResponse {
569
614
leader_id,
570
615
successes : persist_successes,
@@ -683,7 +728,7 @@ impl IngesterService for Ingester {
683
728
. flat_map ( |shards| shards. queue_ids ( ) )
684
729
. collect ( ) ;
685
730
686
- self . close_shards_inner ( & mut state_guard , & queue_ids) . await ;
731
+ state_guard . close_shards ( & queue_ids) . await ;
687
732
688
733
Ok ( CloseShardsResponse { } )
689
734
}
@@ -733,7 +778,7 @@ impl IngesterService for Ingester {
733
778
}
734
779
}
735
780
}
736
- self . check_decommissioning_status ( & mut state_guard ) ;
781
+ state_guard . check_decommissioning_status ( ) ;
737
782
let truncate_response = TruncateShardsResponse { } ;
738
783
Ok ( truncate_response)
739
784
}
@@ -744,12 +789,7 @@ impl IngesterService for Ingester {
744
789
) -> IngestV2Result < DecommissionResponse > {
745
790
info ! ( "decommissioning ingester" ) ;
746
791
let mut state_guard = self . state . write ( ) . await ;
747
-
748
- let queue_ids: Vec < QueueId > = state_guard. shards . keys ( ) . cloned ( ) . collect ( ) ;
749
- self . close_shards_inner ( & mut state_guard, & queue_ids) . await ;
750
-
751
- state_guard. status = IngesterStatus :: Decommissioning ;
752
- self . check_decommissioning_status ( & mut state_guard) ;
792
+ state_guard. decommission ( ) . await ;
753
793
754
794
Ok ( DecommissionResponse { } )
755
795
}
@@ -1836,15 +1876,15 @@ mod tests {
1836
1876
let ( _ingester_ctx, ingester) = IngesterForTest :: default ( ) . build ( ) . await ;
1837
1877
let mut state_guard = ingester. state . write ( ) . await ;
1838
1878
1839
- ingester . check_decommissioning_status ( & mut state_guard ) ;
1879
+ state_guard . check_decommissioning_status ( ) ;
1840
1880
assert_eq ! ( state_guard. status, IngesterStatus :: Ready ) ;
1841
1881
assert_eq ! (
1842
1882
ingester. observation_rx. borrow( ) . as_ref( ) . unwrap( ) . status( ) ,
1843
1883
IngesterStatus :: Ready
1844
1884
) ;
1845
1885
1846
1886
state_guard. status = IngesterStatus :: Decommissioning ;
1847
- ingester . check_decommissioning_status ( & mut state_guard ) ;
1887
+ state_guard . check_decommissioning_status ( ) ;
1848
1888
assert_eq ! ( state_guard. status, IngesterStatus :: Decommissioned ) ;
1849
1889
1850
1890
state_guard. status = IngesterStatus :: Decommissioning ;
@@ -1855,13 +1895,13 @@ mod tests {
1855
1895
queue_id_01. clone ( ) ,
1856
1896
IngesterShard :: new_solo ( ShardState :: Closed , Position :: Eof , Position :: Beginning ) ,
1857
1897
) ;
1858
- ingester . check_decommissioning_status ( & mut state_guard ) ;
1898
+ state_guard . check_decommissioning_status ( ) ;
1859
1899
assert_eq ! ( state_guard. status, IngesterStatus :: Decommissioning ) ;
1860
1900
1861
1901
let shard = state_guard. shards . get_mut ( & queue_id_01) . unwrap ( ) ;
1862
1902
shard. truncation_position_inclusive = Position :: Eof ;
1863
1903
1864
- ingester . check_decommissioning_status ( & mut state_guard ) ;
1904
+ state_guard . check_decommissioning_status ( ) ;
1865
1905
assert_eq ! ( state_guard. status, IngesterStatus :: Decommissioned ) ;
1866
1906
assert_eq ! (
1867
1907
ingester. observation_rx. borrow( ) . as_ref( ) . unwrap( ) . status( ) ,
0 commit comments