floresta_wire/p2p_wire/node/
sync_ctx.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
// SPDX-License-Identifier: MIT OR Apache-2.0

//! A node that downloads and validates the blockchain.

use std::time::Duration;
use std::time::Instant;

use bitcoin::p2p::ServiceFlags;
use floresta_chain::proof_util;
use floresta_chain::ThreadSafeChain;
use floresta_common::service_flags;
use rand::seq::IteratorRandom;
use rand::thread_rng;
use tokio::time;
use tokio::time::MissedTickBehavior;
use tracing::debug;
use tracing::info;

use crate::node::periodic_job;
use crate::node::try_and_log;
use crate::node::ConnectionKind;
use crate::node::InflightRequests;
use crate::node::NodeNotification;
use crate::node::NodeRequest;
use crate::node::UtreexoNode;
use crate::node_context::LoopControl;
use crate::node_context::NodeContext;
use crate::p2p_wire::error::WireError;
use crate::p2p_wire::peer::PeerMessages;

/// [`SyncNode`] is a node that downloads and validates the blockchain.
/// This node implements:
///     - `NodeContext`
///     - `UtreexoNode<SyncNode, Chain>`
///
/// see [node_context](crates/floresta-wire/src/p2p_wire/node_context.rs) and [node.rs](crates/floresta-wire/src/p2p_wire/node.rs) for more information.
#[derive(Clone, Debug, Default)]
pub struct SyncNode {}

impl NodeContext for SyncNode {
    /// Get the required [services](ServiceFlags) for the [`SyncNode`].
    ///
    /// The [`SyncNode`] is active during IBD, and therefore requires that peers support:
    ///   * `NETWORK`: the peer is capable of serving the entire blockchain.
    ///   * `WITNESS`: the peer is capable of serving blocks and transactions with witness data.
    ///   * `UTREEXO_ARCHIVE`: the peer is capable of serving inclusion proofs for the entire blockchain.
    fn get_required_services(&self) -> ServiceFlags {
        ServiceFlags::NETWORK | ServiceFlags::WITNESS | service_flags::UTREEXO_ARCHIVE.into()
    }

    const REQUEST_TIMEOUT: u64 = 60 * 2; // 2 minutes
    const MAX_INFLIGHT_REQUESTS: usize = 100; // double the default

    // A more conservative value than the default of 1 second, since we'll have many peer messages
    const MAINTENANCE_TICK: Duration = Duration::from_secs(5);
}

/// Node methods for a [`UtreexoNode`] where its Context is a [`SyncNode`].
/// See [node](crates/floresta-wire/src/p2p_wire/node.rs) for more information.
impl<Chain> UtreexoNode<Chain, SyncNode>
where
    Chain: ThreadSafeChain,
    WireError: From<Chain::Error>,
    Chain::Error: From<proof_util::UtreexoLeafError>,
{
    /// Computes the next blocks to request, and sends a GETDATA request
    ///
    /// We send block requests in batches of four, and we can always have two
    /// such batches inflight. Therefore, we can have at most eight inflight
    /// blocks.
    ///
    /// This function sends exactly one GETDATA, therefore ask for four blocks.
    /// It will compute the next blocks we need, given our tip, validation index,
    /// inflight requests and cached blocks. We then select a random peer and send
    /// the request.
    ///
    /// TODO: Be smarter when selecting peers to send, like taking in consideration
    /// already inflight blocks and latency.
    fn get_blocks_to_download(&mut self) {
        let max_inflight_blocks = SyncNode::BLOCKS_PER_GETDATA * SyncNode::MAX_CONCURRENT_GETDATA;
        let inflight_blocks = self
            .inflight
            .keys()
            .filter(|inflight| matches!(inflight, InflightRequests::Blocks(_)))
            .count();

        let unprocessed_blocks = inflight_blocks + self.blocks.len();

        // if we do a request, this will be the new inflight blocks count
        let next_unprocessed_count = unprocessed_blocks + SyncNode::BLOCKS_PER_GETDATA;

        // if this request would make our inflight queue too long, postpone it
        if next_unprocessed_count > max_inflight_blocks {
            return;
        }

        let mut blocks = Vec::with_capacity(SyncNode::BLOCKS_PER_GETDATA);
        for _ in 0..SyncNode::BLOCKS_PER_GETDATA {
            let next_block = self.last_block_request + 1;
            let validation_index = self.chain.get_validation_index().unwrap();
            if next_block <= validation_index {
                self.last_block_request = validation_index;
            }

            let next_block = self.chain.get_block_hash(next_block);
            match next_block {
                Ok(next_block) => {
                    blocks.push(next_block);
                    self.last_block_request += 1;
                }

                Err(_) => {
                    // this is likely because we've reached the end of the chain
                    // and we've got a `BlockNotPresent` error.
                    break;
                }
            }
        }

        try_and_log!(self.request_blocks(blocks));
    }

    fn ask_for_missed_blocks(&mut self) -> Result<(), WireError> {
        let next_request = self.chain.get_validation_index()? + 1;
        let last_block_requested = self.last_block_request;

        // we accumulate the hashes of all blocks in [next_request, last_block_requested] here
        // and pass it to request_blocks, which will filter inflight and pending blocks out.
        let mut range_blocks = Vec::new();

        for request_height in next_request..=last_block_requested {
            let block_hash = self.chain.get_block_hash(request_height)?;
            range_blocks.push(block_hash);
        }

        self.request_blocks(range_blocks)
    }

    /// This function will periodically check our connections, to ensure that:
    ///   - we have enough utreexo peers to download proofs from (at least 2)
    ///   - we have enough peers to download blocks from (at most `MAX_OUTGOING_PEERS`)
    ///   - if some of peers are too slow, and potentially stalling our block download (TODO)
    fn check_connections(&mut self) -> Result<(), WireError> {
        let total_peers = self.connected_peers();
        let utreexo_peers = self
            .peer_by_service
            .get(&service_flags::UTREEXO.into())
            .map_or(0, |peers| peers.len());

        if utreexo_peers < 2 && total_peers >= SyncNode::MAX_OUTGOING_PEERS {
            // if we have more than the maximum number of outgoing peers, disconnect
            // some non-utreexo peers.
            //
            // FIXME: We should actually disconnect the slowest non-utreexo peer, to
            // make sure we can download blocks faster.
            self.peers
                .values()
                .filter(|peer| {
                    peer.is_regular_peer() && !peer.services.has(service_flags::UTREEXO.into())
                })
                .choose(&mut thread_rng())
                .and_then(|p| p.channel.send(NodeRequest::Shutdown).ok());
        }

        if utreexo_peers < 2 {
            info!("Not enough utreexo peers (we have {utreexo_peers}), opening a new connection");
            self.maybe_open_connection(service_flags::UTREEXO.into())?;
        }

        self.maybe_open_connection(ServiceFlags::NETWORK)
    }

    /// Starts the sync node by updating the last block requested and starting the main loop.
    /// This loop to the following tasks, in order:
    ///     - Receives messages from our peers through the node_tx channel.
    ///     - Handles the message received.
    ///     - Checks if the kill signal is set, if so, breaks the loop.
    ///     - Checks if the chain is in IBD and disables it if it's not (e.g. if the chain is synced).
    ///     - Checks if our tip is obsolete and requests a new one, creating a new connection.
    ///     - Handles timeouts for inflight requests.
    ///     - If were low on inflights, requests new blocks to validate.
    pub async fn run(mut self, done_cb: impl FnOnce(&Chain)) -> Self {
        info!("Starting sync node...");
        self.last_block_request = self.chain.get_validation_index().unwrap();

        let mut ticker = time::interval(SyncNode::MAINTENANCE_TICK);
        // If we fall behind, don't "catch up" by running maintenance repeatedly
        ticker.set_missed_tick_behavior(MissedTickBehavior::Skip);

        loop {
            tokio::select! {
                biased;

                // Maintenance runs only on tick but has priority
                _ = ticker.tick() => match self.maintenance_tick().await {
                    LoopControl::Continue => {},
                    LoopControl::Break => break,
                },

                // Handle messages as soon as we find any, otherwise sleep until maintenance
                msg = self.node_rx.recv() => {
                    let Some(msg) = msg else {
                        break;
                    };
                    try_and_log!(self.handle_message(msg).await);

                    // Drain all queued messages
                    while let Ok(msg) = self.node_rx.try_recv() {
                        try_and_log!(self.handle_message(msg).await);
                    }
                    if *self.kill_signal.read().await {
                        break;
                    }
                }
            }
        }

        done_cb(&self.chain);
        self
    }

    /// Performs the periodic maintenance tasks, including checking for the cancel signal, peer
    /// connections, and inflight request timeouts.
    ///
    /// Returns `LoopControl::Break` if we need to break the main `SyncNode` loop, either because
    /// the kill signal was set or because the chain is synced.
    async fn maintenance_tick(&mut self) -> LoopControl {
        if *self.kill_signal.read().await {
            return LoopControl::Break;
        }

        let validation_index = self
            .chain
            .get_validation_index()
            .expect("validation index block should present");

        let best_block = self
            .chain
            .get_best_block()
            .expect("best block should present")
            .0;

        if validation_index == best_block {
            info!("IBD is finished, switching to normal operation mode");
            self.chain.toggle_ibd(false);
            return LoopControl::Break;
        }

        periodic_job!(
            self.last_connection => self.check_connections(),
            SyncNode::TRY_NEW_CONNECTION,
            no_log,
        );

        // Open new feeler connection periodically
        periodic_job!(
            self.last_feeler => self.open_feeler_connection(),
            SyncNode::FEELER_INTERVAL,
            no_log,
        );

        try_and_log!(self.check_for_timeout());

        let assume_stale = Instant::now()
            .duration_since(self.common.last_tip_update)
            .as_secs()
            > SyncNode::ASSUME_STALE;

        if assume_stale {
            try_and_log!(self.create_connection(ConnectionKind::Extra));
            self.last_tip_update = Instant::now();
            return LoopControl::Continue;
        }

        try_and_log!(self.process_pending_blocks());
        if !self.has_utreexo_peers() {
            return LoopControl::Continue;
        }

        // Ask for missed blocks or proofs if they are no longer inflight or pending
        try_and_log!(self.ask_for_missed_blocks());
        try_and_log!(self.ask_for_missed_proofs());

        self.get_blocks_to_download();
        LoopControl::Continue
    }

    /// Process a message from a peer and handle it accordingly between the variants of [`PeerMessages`].
    async fn handle_message(&mut self, msg: NodeNotification) -> Result<(), WireError> {
        match msg {
            NodeNotification::FromUser(request, responder) => {
                self.perform_user_request(request, responder).await;
            }

            NodeNotification::DnsSeedAddresses(addresses) => {
                self.address_man.push_addresses(&addresses);
            }

            NodeNotification::FromPeer(peer, notification, time) => {
                self.register_message_time(&notification, peer, time);

                let Some(unhandled) = self.handle_peer_msg_common(notification, peer)? else {
                    return Ok(());
                };

                match unhandled {
                    PeerMessages::Block(block) => {
                        let hash = block.block_hash();
                        if self.blocks.contains_key(&hash) {
                            debug!(
                                "Received block {hash} from peer {peer}, but we already have it"
                            );
                            return Ok(());
                        }

                        self.request_block_proof(block, peer)?;

                        self.process_pending_blocks()?;
                        self.get_blocks_to_download();
                    }

                    PeerMessages::Ready(version) => {
                        try_and_log!(self.handle_peer_ready(peer, version));
                    }

                    PeerMessages::Disconnected(idx) => {
                        try_and_log!(self.handle_disconnection(peer, idx));
                    }

                    PeerMessages::UtreexoProof(uproof) => {
                        self.attach_proof(uproof, peer)?;
                        self.process_pending_blocks()?;
                        self.get_blocks_to_download();
                    }

                    _ => {}
                }
            }
        }

        Ok(())
    }
}