Skip to content
This repository was archived by the owner on Feb 12, 2024. It is now read-only.

perf: debounce pin flush #2634

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"datastore-core": "~0.7.0",
"datastore-pubsub": "^0.2.1",
"debug": "^4.1.0",
"delay": "^4.1.0",
"dlv": "^1.1.3",
"err-code": "^2.0.0",
"explain-error": "^1.0.4",
Expand Down Expand Up @@ -199,7 +200,6 @@
"async": "^2.6.3",
"base64url": "^3.0.1",
"clear-module": "^4.0.0",
"delay": "^4.1.0",
"detect-node": "^2.0.4",
"dir-compare": "^1.7.3",
"execa": "^3.0.0",
Expand Down
93 changes: 58 additions & 35 deletions src/core/components/pin/pin-manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ const multicodec = require('multicodec')
const dagCborLinks = require('dag-cbor-links')
const debug = require('debug')
const { cidToString } = require('../../../utils/cid')

const delay = require('delay')
const AbortController = require('abort-controller')
const createPinSet = require('./pin-set')

const { Errors } = require('interface-datastore')
Expand Down Expand Up @@ -106,43 +107,65 @@ class PinManager {
// Encode and write pin key sets to the datastore:
// a DAGLink for each of the recursive and direct pinsets
// a DAGNode holding those as DAGLinks, a kind of root pin
async flushPins () {
const [
dLink,
rLink
] = await Promise.all([
// create a DAGLink to the node with direct pins
this.pinset.storeSet(this.directKeys())
.then((result) => {
return new DAGLink(PinTypes.direct, result.node.size, result.cid)
}),
// create a DAGLink to the node with recursive pins
this.pinset.storeSet(this.recursiveKeys())
.then((result) => {
return new DAGLink(PinTypes.recursive, result.node.size, result.cid)
}),
// the pin-set nodes link to a special 'empty' node, so make sure it exists
this.dag.put(new DAGNode(Buffer.alloc(0)), {
version: 0,
format: multicodec.DAG_PB,
hashAlg: multicodec.SHA2_256,
preload: false
})
])
async flushPins () { // eslint-disable-line require-await
if (this._flushingPins) {
this._flushingPins.controller.abort()
}

// create a root node with DAGLinks to the direct and recursive DAGs
const rootNode = new DAGNode(Buffer.alloc(0), [dLink, rLink])
const rootCid = await this.dag.put(rootNode, {
version: 0,
format: multicodec.DAG_PB,
hashAlg: multicodec.SHA2_256,
preload: false
})
const controller = new AbortController()

this._flushingPins = {
controller,
promise: delay(100)
.then(async () => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How come this call has a .then().catch()? We should stick to await as much as possible.

promise: (async () => {
  await delay(100)
  if (controller.signal.aborted) return
  // ...
})()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personal preference I guess? I've always found IIFEs a bit, well iffy to look at.

if (controller.signal.aborted) {
return
}

// save root to datastore under a consistent key
await this.repo.datastore.put(PIN_DS_KEY, rootCid.buffer)
this._flushingPins = null

const [
dLink,
rLink
] = await Promise.all([
// create a DAGLink to the node with direct pins
this.pinset.storeSet(this.directKeys())
.then((result) => {
return new DAGLink(PinTypes.direct, result.node.size, result.cid)
}),
// create a DAGLink to the node with recursive pins
this.pinset.storeSet(this.recursiveKeys())
.then((result) => {
return new DAGLink(PinTypes.recursive, result.node.size, result.cid)
}),
// the pin-set nodes link to a special 'empty' node, so make sure it exists
this.dag.put(new DAGNode(Buffer.alloc(0)), {
version: 0,
format: multicodec.DAG_PB,
hashAlg: multicodec.SHA2_256,
preload: false
})
])

// create a root node with DAGLinks to the direct and recursive DAGs
const rootNode = new DAGNode(Buffer.alloc(0), [dLink, rLink])

const rootCid = await this.dag.put(rootNode, {
version: 0,
format: multicodec.DAG_PB,
hashAlg: multicodec.SHA2_256,
preload: false
})

// save root to datastore under a consistent key
await this.repo.datastore.put(PIN_DS_KEY, rootCid.buffer)

this.log(`Flushed pins with root: ${rootCid}`)
})
.catch(err => this.log(`Could not flush pins: ${err}`))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please let us not smother the error!

Secondly, if the flush does fail, any debounced calls succeed...and they succeed before the flush has been performed so there's no way to know if an error happens.

Could we use something like debounce-promise? It would mean the debounced calls would have to wait for the debounce run so perhaps this wouldn't exhibit as much perf improvement but would still be good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. If I am importing files A, B and C, and we block on flushing pins after importing A, then B will not start to import until A has finished and the pins have flushed, debounced or otherwise so there's no benefit to be gained.

This PR works by doing the flushing in another async context but as you point out, there's no way to know if an error occurred, because the only way to know is to block until the operation has completed, which means the next file in the list will not get imported until the pins have flushed.

}

this.log(`Flushed pins with root: ${rootCid}`)
return this._flushingPins.promise
}

async load () {
Expand Down