From eb4ab1f54b962c1a1813576f3f35adb03129edfb Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 22 Apr 2019 18:47:11 +0800 Subject: [PATCH 01/56] feat: implement ipfs refs --- test/fixtures/test-data/refs/animals/land/african.txt | 2 ++ test/fixtures/test-data/refs/animals/land/americas.txt | 2 ++ test/fixtures/test-data/refs/animals/land/australian.txt | 2 ++ test/fixtures/test-data/refs/animals/sea/atlantic.txt | 2 ++ test/fixtures/test-data/refs/animals/sea/indian.txt | 2 ++ test/fixtures/test-data/refs/atlantic-animals | 1 + test/fixtures/test-data/refs/fruits/tropical.txt | 2 ++ test/fixtures/test-data/refs/mushroom.txt | 1 + test/utils/ipfs-exec.js | 7 ++----- 9 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 test/fixtures/test-data/refs/animals/land/african.txt create mode 100644 test/fixtures/test-data/refs/animals/land/americas.txt create mode 100644 test/fixtures/test-data/refs/animals/land/australian.txt create mode 100644 test/fixtures/test-data/refs/animals/sea/atlantic.txt create mode 100644 test/fixtures/test-data/refs/animals/sea/indian.txt create mode 120000 test/fixtures/test-data/refs/atlantic-animals create mode 100644 test/fixtures/test-data/refs/fruits/tropical.txt create mode 100644 test/fixtures/test-data/refs/mushroom.txt diff --git a/test/fixtures/test-data/refs/animals/land/african.txt b/test/fixtures/test-data/refs/animals/land/african.txt new file mode 100644 index 0000000000..29decfcd50 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/african.txt @@ -0,0 +1,2 @@ +elephant +rhinocerous \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/americas.txt b/test/fixtures/test-data/refs/animals/land/americas.txt new file mode 100644 index 0000000000..21368a871d --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/americas.txt @@ -0,0 +1,2 @@ +ñandu +tapir \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/australian.txt b/test/fixtures/test-data/refs/animals/land/australian.txt new file mode 100644 index 0000000000..a78c7bc9c3 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/australian.txt @@ -0,0 +1,2 @@ +emu +kangaroo \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/atlantic.txt b/test/fixtures/test-data/refs/animals/sea/atlantic.txt new file mode 100644 index 0000000000..f77ffe5119 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/sea/atlantic.txt @@ -0,0 +1,2 @@ +dolphin +whale \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/indian.txt b/test/fixtures/test-data/refs/animals/sea/indian.txt new file mode 100644 index 0000000000..c455106f6c --- /dev/null +++ b/test/fixtures/test-data/refs/animals/sea/indian.txt @@ -0,0 +1,2 @@ +cuttlefish +octopus \ No newline at end of file diff --git a/test/fixtures/test-data/refs/atlantic-animals b/test/fixtures/test-data/refs/atlantic-animals new file mode 120000 index 0000000000..670958bfa8 --- /dev/null +++ b/test/fixtures/test-data/refs/atlantic-animals @@ -0,0 +1 @@ +animals/sea/atlantic.txt \ No newline at end of file diff --git a/test/fixtures/test-data/refs/fruits/tropical.txt b/test/fixtures/test-data/refs/fruits/tropical.txt new file mode 100644 index 0000000000..4f331bc7d2 --- /dev/null +++ b/test/fixtures/test-data/refs/fruits/tropical.txt @@ -0,0 +1,2 @@ +banana +pineapple \ No newline at end of file diff --git a/test/fixtures/test-data/refs/mushroom.txt b/test/fixtures/test-data/refs/mushroom.txt new file mode 100644 index 0000000000..8b248aa9c8 --- /dev/null +++ b/test/fixtures/test-data/refs/mushroom.txt @@ -0,0 +1 @@ +mushroom \ No newline at end of file diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index 2e764632c5..56749ee2fe 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -7,6 +7,7 @@ const expect = chai.expect chai.use(dirtyChai) const path = require('path') const _ = require('lodash') +const yargs = require('yargs') // This is our new test utility to easily check and execute ipfs cli commands. // @@ -33,11 +34,7 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { - if (args.length === 1) { - args = args[0].split(' ') - } - - const cp = exec(args) + const cp = exec(yargs('-- ' + args[0]).argv._) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: // https://github.com/shelljs/shelljs/blob/master/src/tempdir.js#L43 From 761e30539c1237b22d3c59edd80c3d2ade585b5c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 24 Apr 2019 14:03:46 +0800 Subject: [PATCH 02/56] feat: refs support in http api --- .../files-regular/refs-pull-stream.js | 39 +++++++++++++++++ src/http/api/resources/files-regular.js | 42 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index aec4ba8a6d..c9e81b7dae 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -178,3 +178,42 @@ function getNodeLinks (node, path = '') { } return links } + +// Get links as a DAG Object +// { : [link2, link3, link4], : [...] } +function getLinkDAG (links) { + const linkNames = {} + for (const link of links) { + linkNames[link.name] = link + } + + const linkDAG = {} + for (const link of links) { + const parentName = link.path.substring(0, link.path.lastIndexOf('/')) + linkDAG[parentName] = linkDAG[parentName] || [] + linkDAG[parentName].push(link) + } + return linkDAG +} + +// Recursively get refs for a link +function getRefs (linkDAG, link, format, uniques) { + let refs = [] + const children = linkDAG[link.path] || [] + for (const child of children) { + if (!uniques || !uniques.has(child.hash)) { + uniques && uniques.add(child.hash) + refs.push(formatLink(link, child, format)) + refs = refs.concat(getRefs(linkDAG, child, format, uniques)) + } + } + return refs +} + +// Get formatted link +function formatLink (src, dst, format) { + let out = format.replace(//g, src.hash) + out = out.replace(//g, dst.hash) + out = out.replace(//g, dst.name) + return out +} diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index dbd1f71a53..f2d22271b5 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -320,6 +320,48 @@ exports.ls = { } } +exports.refs = { + validate: { + query: Joi.object().keys({ + r: Joi.boolean().default(false), + recursive: Joi.boolean().default(false), + format: Joi.string().default(Format.default), + e: Joi.boolean().default(false), + edges: Joi.boolean().default(false), + u: Joi.boolean().default(false), + unique: Joi.boolean().default(false), + 'max-depth': Joi.number().integer().min(-1), + maxDepth: Joi.number().integer().min(-1) + }).unknown() + }, + + // uses common parseKey method that returns a `key` + parseArgs: exports.parseKey, + + // main route handler which is called after the above `parseArgs`, but only if the args were valid + async handler (request, h) { + const { ipfs } = request.server.app + const { key } = request.pre.args + const recursive = request.query.r === 'true' || request.query.recursive === 'true' + const format = request.query.format + const e = request.query.e === 'true' || request.query.edges === 'true' + const u = request.query.u === 'true' || request.query.unique === 'true' + let maxDepth = request.query['max-depth'] || request.query.maxDepth + if (typeof maxDepth === 'string') { + maxDepth = parseInt(maxDepth) + } + + let refs + try { + refs = await ipfs.refs(key, { recursive, format, e, u, maxDepth }) + } catch (err) { + throw Boom.boomify(err, { message: 'Failed to get refs for path' }) + } + + return h.response(refs) + } +} + function toTypeCode (type) { switch (type) { case 'dir': From 2f632ece30b30513770929a0db7efb89e934212f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Sun, 28 Apr 2019 23:11:19 +0800 Subject: [PATCH 03/56] feat: use ipld instead of unix-fs-exporter for refs --- .../files-regular/refs-pull-stream.js | 88 +++++++++++++------ .../test-data/refs/animals/land/african.txt | 2 - .../test-data/refs/animals/land/americas.txt | 2 - .../refs/animals/land/australian.txt | 2 - .../test-data/refs/animals/sea/atlantic.txt | 2 - .../test-data/refs/animals/sea/indian.txt | 2 - test/fixtures/test-data/refs/atlantic-animals | 1 - .../test-data/refs/fruits/tropical.txt | 2 - test/fixtures/test-data/refs/mushroom.txt | 1 - 9 files changed, 59 insertions(+), 43 deletions(-) delete mode 100644 test/fixtures/test-data/refs/animals/land/african.txt delete mode 100644 test/fixtures/test-data/refs/animals/land/americas.txt delete mode 100644 test/fixtures/test-data/refs/animals/land/australian.txt delete mode 100644 test/fixtures/test-data/refs/animals/sea/atlantic.txt delete mode 100644 test/fixtures/test-data/refs/animals/sea/indian.txt delete mode 120000 test/fixtures/test-data/refs/atlantic-animals delete mode 100644 test/fixtures/test-data/refs/fruits/tropical.txt delete mode 100644 test/fixtures/test-data/refs/mushroom.txt diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index c9e81b7dae..15875eb025 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -12,6 +12,13 @@ const { Format } = require('./refs') module.exports = function (self) { return function (ipfsPath, options = {}) { + setOptionsAlias(options, [ + ['recursive', 'r'], + ['e', 'edges'], + ['u', 'unique'], + ['maxDepth', 'max-depth'] + ]) + if (options.maxDepth === 0) { return pull.empty() } @@ -179,41 +186,64 @@ function getNodeLinks (node, path = '') { return links } -// Get links as a DAG Object -// { : [link2, link3, link4], : [...] } -function getLinkDAG (links) { - const linkNames = {} - for (const link of links) { - linkNames[link.name] = link - } +// Do a depth first search of the DAG, starting from the given root cid +function objectStream (ipfs, rootCid, maxDepth, isUnique) { + const uniques = new Set() - const linkDAG = {} - for (const link of links) { - const parentName = link.path.substring(0, link.path.lastIndexOf('/')) - linkDAG[parentName] = linkDAG[parentName] || [] - linkDAG[parentName].push(link) - } - return linkDAG -} + const root = { node: { cid: rootCid }, depth: 0 } + const traverseLevel = (obj) => { + const { node, depth } = obj + + // Check the depth + const nextLevelDepth = depth + 1 + if (nextLevelDepth > maxDepth) { + return pull.empty() + } -// Recursively get refs for a link -function getRefs (linkDAG, link, format, uniques) { - let refs = [] - const children = linkDAG[link.path] || [] - for (const child of children) { - if (!uniques || !uniques.has(child.hash)) { - uniques && uniques.add(child.hash) - refs.push(formatLink(link, child, format)) - refs = refs.concat(getRefs(linkDAG, child, format, uniques)) + // If unique option is enabled, check if the CID has been seen before. + // Note we need to do this here rather than before adding to the stream + // so that the unique check happens in the order that items are examined + // in the DAG. + if (isUnique) { + if (uniques.has(node.cid.toString())) { + // Mark this object as a duplicate so we can filter it out later + obj.isDuplicate = true + return pull.empty() + } + uniques.add(node.cid.toString()) } + + const deferred = pullDefer.source() + + // Get this object's links + ipfs.object.links(node.cid, (err, links) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + err.message = `Could not find object with CID: ${node.cid}` + } + return deferred.resolve(pull.error(err)) + } + + // Add to the stream each link, parent and the new depth + const vals = links.map(link => ({ + parent: node, + node: link, + depth: nextLevelDepth + })) + + deferred.resolve(pull.values(vals)) + }) + + return deferred } - return refs + + return pullTraverse.depthFirst(root, traverseLevel) } // Get formatted link -function formatLink (src, dst, format) { - let out = format.replace(//g, src.hash) - out = out.replace(//g, dst.hash) - out = out.replace(//g, dst.name) +function formatLink (srcCid, dstCid, linkName, format) { + let out = format.replace(//g, srcCid.toString()) + out = out.replace(//g, dstCid.toString()) + out = out.replace(//g, linkName) return out } diff --git a/test/fixtures/test-data/refs/animals/land/african.txt b/test/fixtures/test-data/refs/animals/land/african.txt deleted file mode 100644 index 29decfcd50..0000000000 --- a/test/fixtures/test-data/refs/animals/land/african.txt +++ /dev/null @@ -1,2 +0,0 @@ -elephant -rhinocerous \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/americas.txt b/test/fixtures/test-data/refs/animals/land/americas.txt deleted file mode 100644 index 21368a871d..0000000000 --- a/test/fixtures/test-data/refs/animals/land/americas.txt +++ /dev/null @@ -1,2 +0,0 @@ -ñandu -tapir \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/australian.txt b/test/fixtures/test-data/refs/animals/land/australian.txt deleted file mode 100644 index a78c7bc9c3..0000000000 --- a/test/fixtures/test-data/refs/animals/land/australian.txt +++ /dev/null @@ -1,2 +0,0 @@ -emu -kangaroo \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/atlantic.txt b/test/fixtures/test-data/refs/animals/sea/atlantic.txt deleted file mode 100644 index f77ffe5119..0000000000 --- a/test/fixtures/test-data/refs/animals/sea/atlantic.txt +++ /dev/null @@ -1,2 +0,0 @@ -dolphin -whale \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/indian.txt b/test/fixtures/test-data/refs/animals/sea/indian.txt deleted file mode 100644 index c455106f6c..0000000000 --- a/test/fixtures/test-data/refs/animals/sea/indian.txt +++ /dev/null @@ -1,2 +0,0 @@ -cuttlefish -octopus \ No newline at end of file diff --git a/test/fixtures/test-data/refs/atlantic-animals b/test/fixtures/test-data/refs/atlantic-animals deleted file mode 120000 index 670958bfa8..0000000000 --- a/test/fixtures/test-data/refs/atlantic-animals +++ /dev/null @@ -1 +0,0 @@ -animals/sea/atlantic.txt \ No newline at end of file diff --git a/test/fixtures/test-data/refs/fruits/tropical.txt b/test/fixtures/test-data/refs/fruits/tropical.txt deleted file mode 100644 index 4f331bc7d2..0000000000 --- a/test/fixtures/test-data/refs/fruits/tropical.txt +++ /dev/null @@ -1,2 +0,0 @@ -banana -pineapple \ No newline at end of file diff --git a/test/fixtures/test-data/refs/mushroom.txt b/test/fixtures/test-data/refs/mushroom.txt deleted file mode 100644 index 8b248aa9c8..0000000000 --- a/test/fixtures/test-data/refs/mushroom.txt +++ /dev/null @@ -1 +0,0 @@ -mushroom \ No newline at end of file From 7b498fbb4acc6cb1bd439aa3fca121b6f9568997 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 30 Apr 2019 22:42:35 +0800 Subject: [PATCH 04/56] feat: refs local --- src/http/api/resources/files-regular.js | 32 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index f2d22271b5..9253359d42 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -320,6 +320,17 @@ exports.ls = { } } +function toTypeCode (type) { + switch (type) { + case 'dir': + return 1 + case 'file': + return 2 + default: + return 0 + } +} + exports.refs = { validate: { query: Joi.object().keys({ @@ -362,14 +373,19 @@ exports.refs = { } } -function toTypeCode (type) { - switch (type) { - case 'dir': - return 1 - case 'file': - return 2 - default: - return 0 +exports.refs.local = { + // main route handler + async handler (request, h) { + const { ipfs } = request.server.app + + let refs + try { + refs = await ipfs.refs.local() + } catch (err) { + throw Boom.boomify(err, { message: 'Failed to get local refs' }) + } + + return h.response(refs) } } From c8e964a30b36342aa38d0dd8ceffa4e1c7927520 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 1 May 2019 19:34:57 +0800 Subject: [PATCH 05/56] feat: add refs.localPullStream && refs.localReadableStream --- src/http/api/resources/files-regular.js | 68 ++++++++++++++++++------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 9253359d42..8b3849f148 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -350,7 +350,7 @@ exports.refs = { parseArgs: exports.parseKey, // main route handler which is called after the above `parseArgs`, but only if the args were valid - async handler (request, h) { + handler (request, h) { const { ipfs } = request.server.app const { key } = request.pre.args const recursive = request.query.r === 'true' || request.query.recursive === 'true' @@ -362,31 +362,65 @@ exports.refs = { maxDepth = parseInt(maxDepth) } - let refs - try { - refs = await ipfs.refs(key, { recursive, format, e, u, maxDepth }) - } catch (err) { - throw Boom.boomify(err, { message: 'Failed to get refs for path' }) - } - - return h.response(refs) + const source = ipfs.refsPullStream(key, { recursive, format, e, u, maxDepth }) + return sendRefsReplyStream(request, h, `refs for ${key}`, source) } } exports.refs.local = { // main route handler - async handler (request, h) { + handler (request, h) { const { ipfs } = request.server.app + const source = ipfs.refs.localPullStream() + return sendRefsReplyStream(request, h, 'local refs', source) + } +} - let refs - try { - refs = await ipfs.refs.local() - } catch (err) { - throw Boom.boomify(err, { message: 'Failed to get local refs' }) - } +function sendRefsReplyStream (request, h, desc, source) { + const replyStream = pushable() + const aborter = abortable() + + const stream = toStream.source(pull( + replyStream, + aborter, + ndjson.serialize() + )) - return h.response(refs) + // const stream = toStream.source(replyStream.source) + // hapi is not very clever and throws if no + // - _read method + // - _readableState object + // are there :( + if (!stream._read) { + stream._read = () => {} + stream._readableState = {} + stream.unpipe = () => {} } + + pull( + source, + pull.drain( + (ref) => replyStream.push(ref), + (err) => { + if (err) { + request.raw.res.addTrailers({ + 'X-Stream-Error': JSON.stringify({ + Message: `Failed to get ${desc}: ${err.message || ''}`, + Code: 0 + }) + }) + return aborter.abort() + } + + replyStream.end() + } + ) + ) + + return h.response(stream) + .header('x-chunked-output', '1') + .header('content-type', 'application/json') + .header('Trailer', 'X-Stream-Error') } exports.refs = { From 0137caf0cb2a134adac6a6101d9636a28dd3dcd1 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 3 May 2019 23:00:24 +0800 Subject: [PATCH 06/56] feat: make object.links work with CBOR --- src/core/components/object.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/core/components/object.js b/src/core/components/object.js index 87f03075aa..bec9d6fb0f 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -107,6 +107,22 @@ function findLinks (node, links = []) { return links } +// Recursively search the node for CIDs +function getNodeLinks (node, path = '') { + let links = [] + for (const [name, value] of Object.entries(node)) { + if (CID.isCID(value)) { + links.push({ + name: path + name, + cid: value + }) + } else if (typeof value === 'object') { + links = links.concat(getNodeLinks(value, path + name + '/')) + } + } + return links +} + module.exports = function object (self) { function editAndSave (edit) { return (multihash, options, callback) => { From f6d7a2ad77ed9ca6b740f4d91fed7b2b84571b79 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 3 May 2019 23:03:03 +0800 Subject: [PATCH 07/56] feat: handle multiple refs. Better param handling --- .../files-regular/refs-pull-stream.js | 7 ------ src/http/api/resources/files-regular.js | 22 +++++++------------ 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 15875eb025..49953c3653 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -12,13 +12,6 @@ const { Format } = require('./refs') module.exports = function (self) { return function (ipfsPath, options = {}) { - setOptionsAlias(options, [ - ['recursive', 'r'], - ['e', 'edges'], - ['u', 'unique'], - ['maxDepth', 'max-depth'] - ]) - if (options.maxDepth === 0) { return pull.empty() } diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 8b3849f148..1419abf247 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -334,15 +334,11 @@ function toTypeCode (type) { exports.refs = { validate: { query: Joi.object().keys({ - r: Joi.boolean().default(false), recursive: Joi.boolean().default(false), format: Joi.string().default(Format.default), - e: Joi.boolean().default(false), edges: Joi.boolean().default(false), - u: Joi.boolean().default(false), unique: Joi.boolean().default(false), - 'max-depth': Joi.number().integer().min(-1), - maxDepth: Joi.number().integer().min(-1) + 'max-depth': Joi.number().integer().min(-1) }).unknown() }, @@ -353,16 +349,14 @@ exports.refs = { handler (request, h) { const { ipfs } = request.server.app const { key } = request.pre.args - const recursive = request.query.r === 'true' || request.query.recursive === 'true' + + const recursive = request.query.recursive const format = request.query.format - const e = request.query.e === 'true' || request.query.edges === 'true' - const u = request.query.u === 'true' || request.query.unique === 'true' - let maxDepth = request.query['max-depth'] || request.query.maxDepth - if (typeof maxDepth === 'string') { - maxDepth = parseInt(maxDepth) - } + const edges = request.query.edges + const unique = request.query.unique + const maxDepth = request.query['max-depth'] - const source = ipfs.refsPullStream(key, { recursive, format, e, u, maxDepth }) + const source = ipfs.refsPullStream(key, { recursive, format, edges, unique, maxDepth }) return sendRefsReplyStream(request, h, `refs for ${key}`, source) } } @@ -400,7 +394,7 @@ function sendRefsReplyStream (request, h, desc, source) { pull( source, pull.drain( - (ref) => replyStream.push(ref), + (ref) => replyStream.push({ Ref: ref.ref, Err: ref.err }), (err) => { if (err) { request.raw.res.addTrailers({ From 793a355c6e44148dcea860947e407916dc22cc98 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 8 May 2019 17:31:43 +0800 Subject: [PATCH 08/56] feat: GC --- src/cli/commands/repo/gc.js | 31 ++++++-- src/core/components/gc.js | 150 ++++++++++++++++++++++++++++++++++++ src/core/components/repo.js | 7 +- 3 files changed, 177 insertions(+), 11 deletions(-) create mode 100644 src/core/components/gc.js diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index ec3b547e93..0f7bfff29a 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -1,16 +1,37 @@ 'use strict' +const { print } = require('../../utils') + module.exports = { command: 'gc', describe: 'Perform a garbage collection sweep on the repo.', - builder: {}, + builder: { + quiet: { + alias: 'q', + desc: 'Write minimal output', + type: 'boolean', + default: false + }, + 'stream-errors': { + desc: 'Output individual errors thrown when deleting blocks.', + type: 'boolean', + default: false + } + }, - handler (argv) { - argv.resolve((async () => { - const ipfs = await argv.getIpfs() - await ipfs.repo.gc() + handler ({ getIpfs, quiet, streamErrors, resolve }) { + resolve((async () => { + const ipfs = await getIpfs() + const res = await ipfs.repo.gc() + for (const r of res) { + if (res.err) { + streamErrors && print(res.err, true, true) + } else { + print((quiet ? '' : 'Removed ') + r.cid) + } + } })()) } } diff --git a/src/core/components/gc.js b/src/core/components/gc.js new file mode 100644 index 0000000000..5f93530c04 --- /dev/null +++ b/src/core/components/gc.js @@ -0,0 +1,150 @@ +'use strict' + +const promisify = require('promisify-es6') +const CID = require('cids') +const base32 = require('base32.js') +const parallel = require('async/parallel') +const map = require('async/map') + +module.exports = function gc (self) { + return promisify(async (opts, callback) => { + if (typeof opts === 'function') { + callback = opts + opts = {} + } + + const start = Date.now() + self.log(`GC: Creating set of marked blocks`) + + parallel([ + // Get all blocks from the blockstore + (cb) => self._repo.blocks.query({ keysOnly: true }, cb), + // Mark all blocks that are being used + (cb) => createColoredSet(self, cb) + ], (err, [blocks, coloredSet]) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return callback(err) + } + + // Delete blocks that are not being used + deleteUnmarkedBlocks(self, coloredSet, blocks, start, (err, res) => { + err && self.log(`GC: Error - ${err.message}`) + callback(err, res) + }) + }) + }) +} + +// TODO: make global constants +const { Key } = require('interface-datastore') +const pinDataStoreKey = new Key('/local/pins') +const MFS_ROOT_KEY = new Key('/local/filesroot') + +function createColoredSet (ipfs, callback) { + parallel([ + // "Empty block" used by the pinner + (cb) => cb(null, ['QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n']), + + // All pins, direct and indirect + (cb) => ipfs.pin.ls((err, pins) => { + if (err) { + return cb(new Error(`Could not list pinned blocks: ${err.message}`)) + } + ipfs.log(`GC: Found ${pins.length} pinned blocks`) + cb(null, pins.map(p => p.hash)) + }), + + // Blocks used internally by the pinner + (cb) => ipfs._repo.datastore.get(pinDataStoreKey, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + ipfs.log(`GC: No pinned blocks`) + return cb(null, []) + } + return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) + } + + const cid = new CID(mh) + ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { + // TODO: Handle not found? + if (err) { + return cb(new Error(`Could not get pin sets from store: ${err.message}`)) + } + + // The pinner stores an object that has two links to pin sets: + // 1. The directly pinned CIDs + // 2. The recursively pinned CIDs + cb(null, [cid.toString(), ...obj.value.links.map(l => l.cid.toString())]) + }) + }), + + // The MFS root and all its descendants + (cb) => ipfs._repo.datastore.get(MFS_ROOT_KEY, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + ipfs.log(`GC: No blocks in MFS`) + return cb(null, []) + } + return cb(new Error(`Could not get MFS root from datastore: ${err.message}`)) + } + + getDescendants(ipfs, new CID(mh), cb) + }) + ], (err, res) => callback(err, !err && new Set(res.flat()))) +} + +function getDescendants (ipfs, cid, callback) { + // TODO: Make sure we don't go out to the network + ipfs.refs(cid, { recursive: true }, (err, refs) => { + if (err) { + return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) + } + ipfs.log(`GC: Found ${refs.length} MFS blocks`) + callback(null, [cid.toString(), ...refs.map(r => r.ref)]) + }) +} + +function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { + // Iterate through all blocks and find those that are not in the marked set + // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } + const unreferenced = [] + const res = [] + for (const { key: k } of blocks) { + try { + const cid = dsKeyToCid(k) + if (!coloredSet.has(cid.toString())) { + unreferenced.push(cid) + } + } catch (err) { + res.push({ err: new Error(`Could not convert block with key '${k}' to CID: ${err.message}`) }) + } + } + + const msg = `GC: Marked set has ${coloredSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + `Deleting ${unreferenced.length} blocks.` + ipfs.log(msg) + + // TODO: limit concurrency + map(unreferenced, (cid, cb) => { + // Delete blocks from blockstore + ipfs._repo.blocks.delete(cid, (err) => { + const res = { + cid: cid.toString(), + err: err && new Error(`Could not delete block with CID ${cid}: ${err.message}`) + } + cb(null, res) + }) + }, (_, delRes) => { + ipfs.log(`GC: Complete (${Date.now() - start}ms)`) + + callback(null, res.concat(delRes)) + }) +} + +function dsKeyToCid (key) { + // Block key is of the form / + const decoder = new base32.Decoder() + const buff = decoder.write(key.toString().slice(1)).finalize() + return new CID(buff) +} diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 23116d8cf5..d88ad887a3 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -39,12 +39,7 @@ module.exports = function repo (self) { }), gc: promisify((options, callback) => { - if (typeof options === 'function') { - callback = options - options = {} - } - - callback(new Error('Not implemented')) + require('./gc')(self)(options, callback) }), stat: promisify((options, callback) => { From 719a9f99148ca02bf377fe9bb77cf6a74d508281 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 8 May 2019 23:08:05 +0800 Subject: [PATCH 09/56] chore: add comment to explain cli param parsing --- test/utils/ipfs-exec.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index 56749ee2fe..7094a1639a 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -34,6 +34,9 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { + // Adding '--' at the front of the command allows us to parse commands that + // have a parameter with spaces in it, eg + // ipfs refs --format=" -> " const cp = exec(yargs('-- ' + args[0]).argv._) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: From a5db72351baba244af6b2041f43a1f3790558b6e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 03:48:31 +0800 Subject: [PATCH 10/56] refactor: move links retrieval from object to refs --- .../files-regular/refs-pull-stream.js | 32 +++++++++++++++---- src/core/components/object.js | 20 +++--------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 49953c3653..8447192ae0 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -209,7 +209,7 @@ function objectStream (ipfs, rootCid, maxDepth, isUnique) { const deferred = pullDefer.source() // Get this object's links - ipfs.object.links(node.cid, (err, links) => { + getLinks(ipfs, node.cid, (err, links) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { err.message = `Could not find object with CID: ${node.cid}` @@ -233,10 +233,28 @@ function objectStream (ipfs, rootCid, maxDepth, isUnique) { return pullTraverse.depthFirst(root, traverseLevel) } -// Get formatted link -function formatLink (srcCid, dstCid, linkName, format) { - let out = format.replace(//g, srcCid.toString()) - out = out.replace(//g, dstCid.toString()) - out = out.replace(//g, linkName) - return out +// Fetch a node from IPLD then get all its links +function getLinks (ipfs, cid, callback) { + ipfs._ipld.get(new CID(cid), (err, node) => { + if (err) { + return callback(err) + } + callback(null, node.value.links || getNodeLinks(node.value)) + }) +} + +// Recursively search the node for CIDs +function getNodeLinks (node, path = '') { + let links = [] + for (const [name, value] of Object.entries(node)) { + if (CID.isCID(value)) { + links.push({ + name: path + name, + cid: value + }) + } else if (typeof value === 'object') { + links = links.concat(getNodeLinks(value, path + name + '/')) + } + } + return links } diff --git a/src/core/components/object.js b/src/core/components/object.js index bec9d6fb0f..68dc80f22a 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -107,22 +107,6 @@ function findLinks (node, links = []) { return links } -// Recursively search the node for CIDs -function getNodeLinks (node, path = '') { - let links = [] - for (const [name, value] of Object.entries(node)) { - if (CID.isCID(value)) { - links.push({ - name: path + name, - cid: value - }) - } else if (typeof value === 'object') { - links = links.concat(getNodeLinks(value, path + name + '/')) - } - } - return links -} - module.exports = function object (self) { function editAndSave (edit) { return (multihash, options, callback) => { @@ -338,6 +322,7 @@ module.exports = function object (self) { return callback(err) } +<<<<<<< HEAD if (cid.codec === 'raw') { return callback(null, []) } @@ -353,6 +338,9 @@ module.exports = function object (self) { } callback(new Error(`Cannot resolve links from codec ${cid.codec}`)) +======= + callback(null, node.links) +>>>>>>> refactor: move links retrieval from object to refs }) }), From ae27eb5295dfa0c941457e4cde4a31bfb42d665a Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 08:29:54 -0400 Subject: [PATCH 11/56] feat: expose GC to http api --- src/http/api/resources/repo.js | 21 ++++++++++++++++---- src/http/api/routes/repo.js | 8 ++++++++ test/cli/repo.js | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index 998431111a..1bf14d3a1f 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -1,9 +1,22 @@ 'use strict' -exports.gc = async (request, h) => { - const { ipfs } = request.server.app - await ipfs.repo.gc() - return h.response() +const Joi = require('joi') + +exports.gc = { + validate: { + query: Joi.object().keys({ + quiet: Joi.boolean().default(false), + 'stream-errors': Joi.boolean().default(false) + }).unknown() + }, + + async handler (request, h) { + const quiet = request.query.quiet + const streamErrors = request.query['stream-errors'] + const { ipfs } = request.server.app + const res = await ipfs.repo.gc({ quiet, streamErrors }) + return h.response(res.map(r => ({ Err: r.err, Key: { '/': r.cid } }))) + } } exports.version = async (request, h) => { diff --git a/src/http/api/routes/repo.js b/src/http/api/routes/repo.js index 21b306e51e..5f2212385c 100644 --- a/src/http/api/routes/repo.js +++ b/src/http/api/routes/repo.js @@ -12,6 +12,14 @@ module.exports = [ method: '*', path: '/api/v0/repo/stat', handler: resources.repo.stat + }, + { + method: '*', + path: '/api/v0/repo/gc', + options: { + validate: resources.repo.gc.validate + }, + handler: resources.repo.gc.handler } // TODO: implement the missing spec https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/REPO.md ] diff --git a/test/cli/repo.js b/test/cli/repo.js index 17c04aaaa3..cbe7acd42b 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -2,7 +2,12 @@ 'use strict' const expect = require('chai').expect +const fs = require('fs') +const os = require('os') +const path = require('path') const repoVersion = require('ipfs-repo').repoVersion +const hat = require('hat') +const clean = require('../utils/clean') const runOnAndOff = require('../utils/on-and-off') @@ -18,4 +23,34 @@ describe('repo', () => runOnAndOff((thing) => { expect(out).to.eql(`${repoVersion}\n`) }) }) + + // Note: There are more comprehensive GC tests in interface-js-ipfs-core + it('should run garbage collection', async () => { + // Create and add a file to IPFS + const filePath = path.join(os.tmpdir(), hat()) + const content = String(Math.random()) + fs.writeFileSync(filePath, content) + const cid = (await ipfs(`add -Q ${filePath}`)).trim() + + // File hash should be in refs local + const localRefs = await ipfs('refs local') + expect(localRefs.split('\n')).includes(cid) + + // Run GC, file should not have been removed because it's pinned + const gcOut = await ipfs('repo gc') + expect(gcOut.split('\n')).not.includes('Removed ' + cid) + + // Unpin file + await ipfs('pin rm ' + cid) + + // Run GC, file should now be removed + const gcOutAfterUnpin = await ipfs('repo gc') + expect(gcOutAfterUnpin.split('\n')).to.includes('Removed ' + cid) + + const localRefsAfterGc = await ipfs('refs local') + expect(localRefsAfterGc.split('\n')).not.includes(cid) + + // Clean up file + await clean(filePath) + }) })) From 1e3aedc788a332c007b4078adc3077222287fb59 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 08:33:37 -0400 Subject: [PATCH 12/56] test: unskip repo gc test --- test/core/interface.spec.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/core/interface.spec.js b/test/core/interface.spec.js index b8fd32a2e5..5bdbe3f853 100644 --- a/test/core/interface.spec.js +++ b/test/core/interface.spec.js @@ -157,15 +157,7 @@ describe('interface-ipfs-core tests', function () { } }) - tests.repo(defaultCommonFactory, { - skip: [ - // repo.gc - { - name: 'gc', - reason: 'TODO: repo.gc is not implemented in js-ipfs yet!' - } - ] - }) + tests.repo(defaultCommonFactory) tests.stats(defaultCommonFactory) From df86ce416a60db5116eb7972a8fc56e69924b768 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 09:58:31 -0400 Subject: [PATCH 13/56] fix: refactor and fix some bugs with GC --- src/cli/commands/repo/gc.js | 4 +-- src/core/components/gc.js | 64 ++++++++++++++++++---------------- src/core/components/repo.js | 6 ++-- src/http/api/resources/repo.js | 11 +++--- test/cli/repo.js | 18 +++------- 5 files changed, 51 insertions(+), 52 deletions(-) diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index 0f7bfff29a..d45f4fdd07 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -26,8 +26,8 @@ module.exports = { const ipfs = await getIpfs() const res = await ipfs.repo.gc() for (const r of res) { - if (res.err) { - streamErrors && print(res.err, true, true) + if (r.err) { + streamErrors && print(r.err, true, true) } else { print((quiet ? '' : 'Removed ') + r.cid) } diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 5f93530c04..81411a9772 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -4,15 +4,17 @@ const promisify = require('promisify-es6') const CID = require('cids') const base32 = require('base32.js') const parallel = require('async/parallel') -const map = require('async/map') +const mapLimit = require('async/mapLimit') +const { Key } = require('interface-datastore') -module.exports = function gc (self) { - return promisify(async (opts, callback) => { - if (typeof opts === 'function') { - callback = opts - opts = {} - } +// Limit on the number of parallel block remove operations +const BLOCK_RM_CONCURRENCY = 256 +const PIN_DS_KEY = new Key('/local/pins') +const MFS_ROOT_DS_KEY = new Key('/local/filesroot') +// Perform mark and sweep garbage collection +module.exports = function gc (self) { + return promisify(async (callback) => { const start = Date.now() self.log(`GC: Creating set of marked blocks`) @@ -20,15 +22,15 @@ module.exports = function gc (self) { // Get all blocks from the blockstore (cb) => self._repo.blocks.query({ keysOnly: true }, cb), // Mark all blocks that are being used - (cb) => createColoredSet(self, cb) - ], (err, [blocks, coloredSet]) => { + (cb) => createMarkedSet(self, cb) + ], (err, [blocks, markedSet]) => { if (err) { self.log(`GC: Error - ${err.message}`) return callback(err) } // Delete blocks that are not being used - deleteUnmarkedBlocks(self, coloredSet, blocks, start, (err, res) => { + deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { err && self.log(`GC: Error - ${err.message}`) callback(err, res) }) @@ -36,15 +38,11 @@ module.exports = function gc (self) { }) } -// TODO: make global constants -const { Key } = require('interface-datastore') -const pinDataStoreKey = new Key('/local/pins') -const MFS_ROOT_KEY = new Key('/local/filesroot') - -function createColoredSet (ipfs, callback) { +// Get Set of CIDs of blocks to keep +function createMarkedSet (ipfs, callback) { parallel([ // "Empty block" used by the pinner - (cb) => cb(null, ['QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n']), + (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), // All pins, direct and indirect (cb) => ipfs.pin.ls((err, pins) => { @@ -52,11 +50,11 @@ function createColoredSet (ipfs, callback) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } ipfs.log(`GC: Found ${pins.length} pinned blocks`) - cb(null, pins.map(p => p.hash)) + cb(null, pins.map(p => new CID(p.hash))) }), // Blocks used internally by the pinner - (cb) => ipfs._repo.datastore.get(pinDataStoreKey, (err, mh) => { + (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { ipfs.log(`GC: No pinned blocks`) @@ -67,7 +65,6 @@ function createColoredSet (ipfs, callback) { const cid = new CID(mh) ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { - // TODO: Handle not found? if (err) { return cb(new Error(`Could not get pin sets from store: ${err.message}`)) } @@ -75,12 +72,12 @@ function createColoredSet (ipfs, callback) { // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - cb(null, [cid.toString(), ...obj.value.links.map(l => l.cid.toString())]) + cb(null, [cid, ...obj.value.links.map(l => l.cid)]) }) }), // The MFS root and all its descendants - (cb) => ipfs._repo.datastore.get(MFS_ROOT_KEY, (err, mh) => { + (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { ipfs.log(`GC: No blocks in MFS`) @@ -91,21 +88,29 @@ function createColoredSet (ipfs, callback) { getDescendants(ipfs, new CID(mh), cb) }) - ], (err, res) => callback(err, !err && new Set(res.flat()))) + ], (err, res) => { + if (err) { + return callback(err) + } + + const cids = res.flat().map(cid => cid.toV1().toString('base32')) + return callback(null, new Set(cids)) + }) } +// Recursively get descendants of the given CID function getDescendants (ipfs, cid, callback) { - // TODO: Make sure we don't go out to the network ipfs.refs(cid, { recursive: true }, (err, refs) => { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } ipfs.log(`GC: Found ${refs.length} MFS blocks`) - callback(null, [cid.toString(), ...refs.map(r => r.ref)]) + callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } -function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { +// Delete all blocks that are not marked as in use +function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { // Iterate through all blocks and find those that are not in the marked set // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } const unreferenced = [] @@ -113,7 +118,7 @@ function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { for (const { key: k } of blocks) { try { const cid = dsKeyToCid(k) - if (!coloredSet.has(cid.toString())) { + if (!markedSet.has(cid.toV1().toString('base32'))) { unreferenced.push(cid) } } catch (err) { @@ -121,12 +126,11 @@ function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { } } - const msg = `GC: Marked set has ${coloredSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `GC: Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` ipfs.log(msg) - // TODO: limit concurrency - map(unreferenced, (cid, cb) => { + mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore ipfs._repo.blocks.delete(cid, (err) => { const res = { diff --git a/src/core/components/repo.js b/src/core/components/repo.js index d88ad887a3..5b0914aade 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -4,6 +4,8 @@ const promisify = require('promisify-es6') const repoVersion = require('ipfs-repo').repoVersion module.exports = function repo (self) { + const gc = require('./gc')(self) + return { init: (bits, empty, callback) => { // 1. check if repo already exists @@ -38,8 +40,8 @@ module.exports = function repo (self) { }) }), - gc: promisify((options, callback) => { - require('./gc')(self)(options, callback) + gc: promisify((callback) => { + gc(callback) }), stat: promisify((options, callback) => { diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index 1bf14d3a1f..9ca4734a1c 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -5,17 +5,20 @@ const Joi = require('joi') exports.gc = { validate: { query: Joi.object().keys({ - quiet: Joi.boolean().default(false), 'stream-errors': Joi.boolean().default(false) }).unknown() }, async handler (request, h) { - const quiet = request.query.quiet const streamErrors = request.query['stream-errors'] const { ipfs } = request.server.app - const res = await ipfs.repo.gc({ quiet, streamErrors }) - return h.response(res.map(r => ({ Err: r.err, Key: { '/': r.cid } }))) + const res = await ipfs.repo.gc() + + const filtered = res.filter(r => !r.err || streamErrors) + const response = filtered.map(r => { + return { Err: r.err, Key: !r.err && { '/': r.cid } } + }) + return h.response(response) } } diff --git a/test/cli/repo.js b/test/cli/repo.js index cbe7acd42b..9fd5d8f19d 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -2,15 +2,11 @@ 'use strict' const expect = require('chai').expect -const fs = require('fs') -const os = require('os') -const path = require('path') const repoVersion = require('ipfs-repo').repoVersion -const hat = require('hat') -const clean = require('../utils/clean') - const runOnAndOff = require('../utils/on-and-off') +const fixturePath = 'test/fixtures/planets/solar-system.md' + describe('repo', () => runOnAndOff((thing) => { let ipfs @@ -26,11 +22,8 @@ describe('repo', () => runOnAndOff((thing) => { // Note: There are more comprehensive GC tests in interface-js-ipfs-core it('should run garbage collection', async () => { - // Create and add a file to IPFS - const filePath = path.join(os.tmpdir(), hat()) - const content = String(Math.random()) - fs.writeFileSync(filePath, content) - const cid = (await ipfs(`add -Q ${filePath}`)).trim() + // Add a file to IPFS + const cid = (await ipfs(`add -Q ${fixturePath}`)).trim() // File hash should be in refs local const localRefs = await ipfs('refs local') @@ -49,8 +42,5 @@ describe('repo', () => runOnAndOff((thing) => { const localRefsAfterGc = await ipfs('refs local') expect(localRefsAfterGc.split('\n')).not.includes(cid) - - // Clean up file - await clean(filePath) }) })) From 0d5085decf1091586b941bff3a6a5c2c0be7ac2f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 20 May 2019 11:47:58 -0400 Subject: [PATCH 14/56] feat: GC locking --- src/core/components/block.js | 23 ++- .../files-regular/add-pull-stream.js | 6 +- src/core/components/gc-lock.js | 102 +++++++++++ src/core/components/gc.js | 36 ++-- src/core/components/object.js | 32 ++-- src/core/components/pin.js | 172 +++++++++--------- src/core/components/repo.js | 6 +- src/core/index.js | 3 + 8 files changed, 246 insertions(+), 134 deletions(-) create mode 100644 src/core/components/gc-lock.js diff --git a/src/core/components/block.js b/src/core/components/block.js index 6a1bb960d1..f5a9f610ed 100644 --- a/src/core/components/block.js +++ b/src/core/components/block.js @@ -81,17 +81,19 @@ module.exports = function block (self) { cb(null, new Block(block, cid)) }) }, - (block, cb) => self._blockService.put(block, (err) => { - if (err) { - return cb(err) - } + (block, cb) => self._gcLock.writeLock((_cb) => { + self._blockService.put(block, (err) => { + if (err) { + return _cb(err) + } - if (options.preload !== false) { - self._preload(block.cid) - } + if (options.preload !== false) { + self._preload(block.cid) + } - cb(null, block) - }) + _cb(null, block) + }) + }, cb) ], callback) }), rm: promisify((cid, callback) => { @@ -100,7 +102,8 @@ module.exports = function block (self) { } catch (err) { return setImmediate(() => callback(errCode(err, 'ERR_INVALID_CID'))) } - self._blockService.delete(cid, callback) + + self._gcLock.readLock((cb) => self._blockService.delete(cid, cb), callback) }), stat: promisify((cid, options, callback) => { if (typeof options === 'function') { diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index 8f46c8913f..d9281d5ac2 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -156,7 +156,9 @@ module.exports = function (self) { } opts.progress = progress - return pull( + + + return self._gcLock.pullReadLock(() => pull( pullMap(content => normalizeContent(content, opts)), pullFlatten(), pullMap(file => ({ @@ -167,6 +169,6 @@ module.exports = function (self) { pullAsyncMap((file, cb) => prepareFile(file, self, opts, cb)), pullMap(file => preloadFile(file, self, opts)), pullAsyncMap((file, cb) => pinFile(file, self, opts, cb)) - ) + )) } } diff --git a/src/core/components/gc-lock.js b/src/core/components/gc-lock.js new file mode 100644 index 0000000000..0d3b5ceca5 --- /dev/null +++ b/src/core/components/gc-lock.js @@ -0,0 +1,102 @@ +'use strict' + +const mortice = require('mortice') +const pull = require('pull-stream') +const log = require('debug')('ipfs:repo:gc:lock') + +class GCLock { + constructor () { + this.mutex = mortice() + } + + readLock (lockedFn, cb) { + return this.lock('readLock', lockedFn, cb) + } + + writeLock (lockedFn, cb) { + return this.lock('writeLock', lockedFn, cb) + } + + lock (type, lockedFn, cb) { + log(`${type} requested`) + const locked = () => new Promise((resolve, reject) => { + log(`${type} started`) + lockedFn((err, res) => err ? reject(err) : resolve(res)) + }) + + const lock = this.mutex[type](locked) + return lock.then(res => cb(null, res)).catch(cb).finally(() => { + log(`${type} released`) + }) + } + + pullReadLock (lockedPullFn) { + return this.pullLock('readLock', lockedPullFn) + } + + pullWriteLock (lockedPullFn) { + return this.pullLock('writeLock', lockedPullFn) + } + + pullLock (type, lockedPullFn) { + const pullLocker = new PullLocker(this.mutex, type) + + return pull( + pullLocker.take(), + lockedPullFn(), + pullLocker.release() + ) + } +} + +class PullLocker { + constructor (mutex, type) { + this.mutex = mutex + this.type = type + + // This Promise resolves when the mutex gives us permission to start + // running the locked piece of code + this.lockReady = new Promise((resolve) => { + this.lockReadyResolver = resolve + }) + } + + // Returns a Promise that resolves when the locked piece of code completes + locked () { + return new Promise((resolve) => { + this.releaseLock = resolve + log(`${this.type} (pull) started`) + + // The locked piece of code is ready to start, so resolve the + // this.lockReady Promise (created in the constructor) + this.lockReadyResolver() + }) + } + + // Requests a lock and then waits for the mutex to give us permission to run + // the locked piece of code + take () { + return pull( + pull.asyncMap((i, cb) => { + if (!this.lock) { + log(`${this.type} (pull) requested`) + // Request the lock + this.lock = this.mutex[this.type](() => this.locked()) + } + + // Wait for the mutex to give us permission + this.lockReady.then(() => cb(null, i)) + }) + ) + } + + // Releases the lock + release () { + return pull.through(null, () => { + log(`${this.type} (pull) released`) + this.releaseLock() + }) + } +} + +module.exports = GCLock diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 81411a9772..c2abe39c5a 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -18,23 +18,28 @@ module.exports = function gc (self) { const start = Date.now() self.log(`GC: Creating set of marked blocks`) - parallel([ - // Get all blocks from the blockstore - (cb) => self._repo.blocks.query({ keysOnly: true }, cb), - // Mark all blocks that are being used - (cb) => createMarkedSet(self, cb) - ], (err, [blocks, markedSet]) => { - if (err) { - self.log(`GC: Error - ${err.message}`) - return callback(err) - } + self._gcLock.writeLock((lockCb) => { + parallel([ + // Get all blocks from the blockstore + (cb) => self._repo.blocks.query({ keysOnly: true }, cb), + // Mark all blocks that are being used + (cb) => createMarkedSet(self, cb) + ], (err, [blocks, markedSet]) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return lockCb(err) + } - // Delete blocks that are not being used - deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { - err && self.log(`GC: Error - ${err.message}`) - callback(err, res) + // Delete blocks that are not being used + deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return lockCb(err) + } + lockCb(null, res) + }) }) - }) + }, callback) }) } @@ -42,6 +47,7 @@ module.exports = function gc (self) { function createMarkedSet (ipfs, callback) { parallel([ // "Empty block" used by the pinner + // TODO: This CID is replicated in pin.js (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), // All pins, direct and indirect diff --git a/src/core/components/object.js b/src/core/components/object.js index 68dc80f22a..b7e9fa1e83 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -242,19 +242,21 @@ module.exports = function object (self) { } function next () { - self._ipld.put(node, multicodec.DAG_PB, { - cidVersion: 0, - hashAlg: multicodec.SHA2_256 - }).then( - (cid) => { - if (options.preload !== false) { - self._preload(cid) - } - - callback(null, cid) - }, - (error) => callback(error) - ) + self._gcLock.readLock((cb) => { + self._ipld.put(node, multicodec.DAG_PB, { + cidVersion: 0, + hashAlg: multicodec.SHA2_256 + }).then( + (cid) => { + if (options.preload !== false) { + self._preload(cid) + } + + cb(null, cid) + }, + cb + ) + }, callback) } }), @@ -322,7 +324,6 @@ module.exports = function object (self) { return callback(err) } -<<<<<<< HEAD if (cid.codec === 'raw') { return callback(null, []) } @@ -338,9 +339,6 @@ module.exports = function object (self) { } callback(new Error(`Cannot resolve links from codec ${cid.codec}`)) -======= - callback(null, node.links) ->>>>>>> refactor: move links retrieval from object to refs }) }), diff --git a/src/core/components/pin.js b/src/core/components/pin.js index a41cba72d4..e8cf41b502 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -203,56 +203,56 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - // verify that each hash can be pinned - map(mhs, (multihash, cb) => { - const cid = new CID(multihash) - const key = cid.toBaseEncodedString() - - if (recursive) { - if (recursivePins.has(key)) { - // it's already pinned recursively - return cb(null, key) - } - - // entire graph of nested links should be pinned, - // so make sure we have all the objects - walkDag({ - dag, - cid, - preload: options.preload - }, (err) => cb(err, key)) - } else { - if (recursivePins.has(key)) { - // recursive supersedes direct, can't have both - return cb(new Error(`${key} already pinned recursively`)) - } - if (directPins.has(key)) { - // already directly pinned - return cb(null, key) - } + self._gcLock.readLock((lockCb) => { + // verify that each hash can be pinned + map(mhs, (multihash, cb) => { + const cid = new CID(multihash) + const key = cid.toBaseEncodedString() + + if (recursive) { + if (recursivePins.has(key)) { + // it's already pinned recursively + return cb(null, key) + } - // make sure we have the object - dag.get(cid, { preload: options.preload }, (err) => { - if (err) { - return cb(err) + // entire graph of nested links should be pinned, + // so make sure we have all the objects + walkDag({ + dag, + cid, + preload: options.preload + }, (err) => cb(err, key)) + } else { + if (recursivePins.has(key)) { + // recursive supersedes direct, can't have both + return cb(new Error(`${key} already pinned recursively`)) + } + if (directPins.has(key)) { + // already directly pinned + return cb(null, key) } - // found the object, we can add the pin - return cb(null, key) - }) - } - }, (err, results) => { - if (err) { return callback(err) } - // update the pin sets in memory - const pinset = recursive ? recursivePins : directPins - results.forEach(key => pinset.add(key)) + // make sure we have the object + dag.get(cid, { preload: options.preload }, (err) => { + if (err) { return cb(err) } + // found the object, we can add the pin + return cb(null, key) + }) + } + }, (err, results) => { + if (err) { return lockCb(err) } - // persist updated pin sets to datastore - flushPins((err, root) => { - if (err) { return callback(err) } - callback(null, results.map(hash => ({ hash }))) + // update the pin sets in memory + const pinset = recursive ? recursivePins : directPins + results.forEach(key => pinset.add(key)) + + // persist updated pin sets to datastore + flushPins((err, root) => { + if (err) { return lockCb(err) } + lockCb(null, results.map(hash => ({ hash }))) + }) }) - }) + }, callback) }) }), @@ -274,50 +274,52 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - // verify that each hash can be unpinned - map(mhs, (multihash, cb) => { - pin._isPinnedWithType(multihash, types.all, (err, res) => { - if (err) { return cb(err) } - const { pinned, reason } = res - const key = toB58String(multihash) - if (!pinned) { - return cb(new Error(`${key} is not pinned`)) - } + self._gcLock.readLock((lockCb) => { + // verify that each hash can be unpinned + map(mhs, (multihash, cb) => { + pin._isPinnedWithType(multihash, types.all, (err, res) => { + if (err) { return cb(err) } + const { pinned, reason } = res + const key = toB58String(multihash) + if (!pinned) { + return cb(new Error(`${key} is not pinned`)) + } - switch (reason) { - case (types.recursive): - if (recursive) { + switch (reason) { + case (types.recursive): + if (recursive) { + return cb(null, key) + } else { + return cb(new Error(`${key} is pinned recursively`)) + } + case (types.direct): return cb(null, key) - } else { - return cb(new Error(`${key} is pinned recursively`)) - } - case (types.direct): - return cb(null, key) - default: - return cb(new Error( - `${key} is pinned indirectly under ${reason}` - )) - } - }) - }, (err, results) => { - if (err) { return callback(err) } - - // update the pin sets in memory - results.forEach(key => { - if (recursive && recursivePins.has(key)) { - recursivePins.delete(key) - } else { - directPins.delete(key) - } - }) + default: + return cb(new Error( + `${key} is pinned indirectly under ${reason}` + )) + } + }) + }, (err, results) => { + if (err) { return lockCb(err) } + + // update the pin sets in memory + results.forEach(key => { + if (recursive && recursivePins.has(key)) { + recursivePins.delete(key) + } else { + directPins.delete(key) + } + }) - // persist updated pin sets to datastore - flushPins((err, root) => { - if (err) { return callback(err) } - self.log(`Removed pins: ${results}`) - callback(null, results.map(hash => ({ hash }))) + // persist updated pin sets to datastore + flushPins((err, root) => { + if (err) { return lockCb(err) } + self.log(`Removed pins: ${results}`) + lockCb(null, results.map(hash => ({ hash }))) + }) }) - }) + }, callback) }) }), diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 5b0914aade..76d9262096 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -4,8 +4,6 @@ const promisify = require('promisify-es6') const repoVersion = require('ipfs-repo').repoVersion module.exports = function repo (self) { - const gc = require('./gc')(self) - return { init: (bits, empty, callback) => { // 1. check if repo already exists @@ -40,9 +38,7 @@ module.exports = function repo (self) { }) }), - gc: promisify((callback) => { - gc(callback) - }), + gc: require('./gc')(self), stat: promisify((options, callback) => { if (typeof options === 'function') { diff --git a/src/core/index.js b/src/core/index.js index 9ab064a607..82a92854e5 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -20,12 +20,14 @@ const EventEmitter = require('events') const config = require('./config') const boot = require('./boot') const components = require('./components') +const GCLock = require('./components/gc-lock') // replaced by repo-browser when running in the browser const defaultRepo = require('./runtime/repo-nodejs') const preload = require('./preload') const mfsPreload = require('./mfs-preload') const ipldOptions = require('./runtime/ipld-nodejs') + /** * @typedef { import("./ipns/index") } IPNS */ @@ -89,6 +91,7 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log + this._gcLock = new GCLock() // IPFS Core exposed components // - for booting up a node From 43b17200c6ab53ac755d2b812cbdee542e778d90 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 17:18:43 -0400 Subject: [PATCH 15/56] test: add gc locking tests --- package.json | 1 + src/core/components/block.js | 4 +- .../files-regular/add-pull-stream.js | 2 +- src/core/components/gc-lock.js | 46 +++- src/core/components/gc.js | 21 +- src/core/components/pin.js | 19 +- test/core/gc.spec.js | 238 ++++++++++++++++++ 7 files changed, 300 insertions(+), 31 deletions(-) create mode 100644 test/core/gc.spec.js diff --git a/package.json b/package.json index 0459207866..ece6616503 100644 --- a/package.json +++ b/package.json @@ -146,6 +146,7 @@ "multihashes": "~0.4.14", "multihashing-async": "~0.6.0", "node-fetch": "^2.3.0", + "p-event": "^4.1.0", "peer-book": "~0.9.0", "peer-id": "~0.12.3", "peer-info": "~0.15.0", diff --git a/src/core/components/block.js b/src/core/components/block.js index f5a9f610ed..2a7196bbc3 100644 --- a/src/core/components/block.js +++ b/src/core/components/block.js @@ -81,7 +81,7 @@ module.exports = function block (self) { cb(null, new Block(block, cid)) }) }, - (block, cb) => self._gcLock.writeLock((_cb) => { + (block, cb) => self._gcLock.readLock((_cb) => { self._blockService.put(block, (err) => { if (err) { return _cb(err) @@ -103,7 +103,7 @@ module.exports = function block (self) { return setImmediate(() => callback(errCode(err, 'ERR_INVALID_CID'))) } - self._gcLock.readLock((cb) => self._blockService.delete(cid, cb), callback) + self._gcLock.writeLock((cb) => self._blockService.delete(cid, cb), callback) }), stat: promisify((cid, options, callback) => { if (typeof options === 'function') { diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index d9281d5ac2..7a77d4685d 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -116,7 +116,7 @@ function pinFile (file, self, opts, cb) { const isRootDir = !file.path.includes('/') const shouldPin = pin && isRootDir && !opts.onlyHash && !opts.hashAlg if (shouldPin) { - return self.pin.add(file.hash, { preload: false }, err => cb(err, file)) + return self.pin.add(file.hash, { preload: false, lock: false }, err => cb(err, file)) } else { cb(null, file) } diff --git a/src/core/components/gc-lock.js b/src/core/components/gc-lock.js index 0d3b5ceca5..4a427c6d13 100644 --- a/src/core/components/gc-lock.js +++ b/src/core/components/gc-lock.js @@ -2,11 +2,14 @@ const mortice = require('mortice') const pull = require('pull-stream') -const log = require('debug')('ipfs:repo:gc:lock') +const EventEmitter = require('events') +const log = require('debug')('ipfs:gc:lock') -class GCLock { +class GCLock extends EventEmitter { constructor () { + super() this.mutex = mortice() + this.lockId = 0 } readLock (lockedFn, cb) { @@ -18,16 +21,28 @@ class GCLock { } lock (type, lockedFn, cb) { - log(`${type} requested`) + if (typeof lockedFn !== 'function') { + throw new Error(`first argument to ${type} must be a function`) + } + if (typeof cb !== 'function') { + throw new Error(`second argument to ${type} must be a callback function`) + } + + const lockId = this.lockId++ + log(`[${lockId}] ${type} requested`) + this.emit(`${type} request`, lockId) const locked = () => new Promise((resolve, reject) => { - log(`${type} started`) - lockedFn((err, res) => err ? reject(err) : resolve(res)) + this.emit(`${type} start`, lockId) + log(`[${lockId}] ${type} started`) + lockedFn((err, res) => { + this.emit(`${type} release`, lockId) + log(`[${lockId}] ${type} released`) + err ? reject(err) : resolve(res) + }) }) const lock = this.mutex[type](locked) - return lock.then(res => cb(null, res)).catch(cb).finally(() => { - log(`${type} released`) - }) + return lock.then(res => cb(null, res)).catch(cb) } pullReadLock (lockedPullFn) { @@ -39,7 +54,7 @@ class GCLock { } pullLock (type, lockedPullFn) { - const pullLocker = new PullLocker(this.mutex, type) + const pullLocker = new PullLocker(this, this.mutex, type, this.lockId++) return pull( pullLocker.take(), @@ -50,9 +65,11 @@ class GCLock { } class PullLocker { - constructor (mutex, type) { + constructor (emitter, mutex, type, lockId) { + this.emitter = emitter this.mutex = mutex this.type = type + this.lockId = lockId // This Promise resolves when the mutex gives us permission to start // running the locked piece of code @@ -65,7 +82,8 @@ class PullLocker { locked () { return new Promise((resolve) => { this.releaseLock = resolve - log(`${this.type} (pull) started`) + log(`[${this.lockId}] ${this.type} (pull) started`) + this.emitter.emit(`${this.type} start`, this.lockId) // The locked piece of code is ready to start, so resolve the // this.lockReady Promise (created in the constructor) @@ -79,7 +97,8 @@ class PullLocker { return pull( pull.asyncMap((i, cb) => { if (!this.lock) { - log(`${this.type} (pull) requested`) + log(`[${this.lockId}] ${this.type} (pull) requested`) + this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock this.lock = this.mutex[this.type](() => this.locked()) } @@ -93,7 +112,8 @@ class PullLocker { // Releases the lock release () { return pull.through(null, () => { - log(`${this.type} (pull) released`) + log(`[${this.lockId}] ${this.type} (pull) released`) + this.emitter.emit(`${this.type} release`, this.lockId) this.releaseLock() }) } diff --git a/src/core/components/gc.js b/src/core/components/gc.js index c2abe39c5a..21b54e6cf2 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -6,6 +6,7 @@ const base32 = require('base32.js') const parallel = require('async/parallel') const mapLimit = require('async/mapLimit') const { Key } = require('interface-datastore') +const log = require('debug')('ipfs:gc') // Limit on the number of parallel block remove operations const BLOCK_RM_CONCURRENCY = 256 @@ -16,7 +17,7 @@ const MFS_ROOT_DS_KEY = new Key('/local/filesroot') module.exports = function gc (self) { return promisify(async (callback) => { const start = Date.now() - self.log(`GC: Creating set of marked blocks`) + log(`Creating set of marked blocks`) self._gcLock.writeLock((lockCb) => { parallel([ @@ -26,14 +27,14 @@ module.exports = function gc (self) { (cb) => createMarkedSet(self, cb) ], (err, [blocks, markedSet]) => { if (err) { - self.log(`GC: Error - ${err.message}`) + log(`Error - ${err.message}`) return lockCb(err) } // Delete blocks that are not being used deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { if (err) { - self.log(`GC: Error - ${err.message}`) + log(`Error - ${err.message}`) return lockCb(err) } lockCb(null, res) @@ -55,7 +56,7 @@ function createMarkedSet (ipfs, callback) { if (err) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } - ipfs.log(`GC: Found ${pins.length} pinned blocks`) + log(`Found ${pins.length} pinned blocks`) cb(null, pins.map(p => new CID(p.hash))) }), @@ -63,7 +64,7 @@ function createMarkedSet (ipfs, callback) { (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { - ipfs.log(`GC: No pinned blocks`) + log(`No pinned blocks`) return cb(null, []) } return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) @@ -86,7 +87,7 @@ function createMarkedSet (ipfs, callback) { (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { - ipfs.log(`GC: No blocks in MFS`) + log(`No blocks in MFS`) return cb(null, []) } return cb(new Error(`Could not get MFS root from datastore: ${err.message}`)) @@ -110,7 +111,7 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - ipfs.log(`GC: Found ${refs.length} MFS blocks`) + log(`Found ${refs.length} MFS blocks`) callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } @@ -132,9 +133,9 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { } } - const msg = `GC: Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` - ipfs.log(msg) + log(msg) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore @@ -146,7 +147,7 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { cb(null, res) }) }, (_, delRes) => { - ipfs.log(`GC: Complete (${Date.now() - start}ms)`) + log(`Complete (${Date.now() - start}ms)`) callback(null, res.concat(delRes)) }) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index e8cf41b502..e5ab3a6b33 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -203,7 +203,7 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - self._gcLock.readLock((lockCb) => { + const pin = (pinComplete) => { // verify that each hash can be pinned map(mhs, (multihash, cb) => { const cid = new CID(multihash) @@ -240,7 +240,7 @@ module.exports = (self) => { }) } }, (err, results) => { - if (err) { return lockCb(err) } + if (err) { return pinComplete(err) } // update the pin sets in memory const pinset = recursive ? recursivePins : directPins @@ -248,11 +248,20 @@ module.exports = (self) => { // persist updated pin sets to datastore flushPins((err, root) => { - if (err) { return lockCb(err) } - lockCb(null, results.map(hash => ({ hash }))) + if (err) { return pinComplete(err) } + pinComplete(null, results.map(hash => ({ hash }))) }) }) - }, callback) + } + + // When adding a file, we take a lock that gets released after pinning + // is complete, so don't take a second lock here + const lock = options.lock !== false + if (lock) { + self._gcLock.readLock(pin, callback) + } else { + pin(callback) + } }) }), diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js new file mode 100644 index 0000000000..8b7a48645b --- /dev/null +++ b/test/core/gc.spec.js @@ -0,0 +1,238 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +const dirtyChai = require('dirty-chai') +const expect = chai.expect +chai.use(dirtyChai) + +const fs = require('fs') + +const IPFS = require('../../src/core') +const createTempRepo = require('../utils/create-repo-nodejs') +const pEvent = require('p-event') + +describe('gc', function () { + const fixtures = [ + 'test/fixtures/planets/mercury/wiki.md', + 'test/fixtures/planets/solar-system.md' + ].map(path => ({ + path, + content: fs.readFileSync(path) + })) + + let ipfs + let repo + + before(function (done) { + this.timeout(20 * 1000) + repo = createTempRepo() + ipfs = new IPFS({ + repo, + config: { + Bootstrap: [] + } + }) + ipfs.on('ready', done) + }) + + after(function (done) { + this.timeout(60 * 1000) + ipfs.stop(done) + }) + + after((done) => repo.teardown(done)) + + const blockAddTests = [{ + name: 'add', + add1: () => ipfs.add(fixtures[0], { pin: false }), + add2: () => ipfs.add(fixtures[1], { pin: false }), + resToCid: (res) => res[0].hash + }, { + name: 'object put', + add1: () => ipfs.object.put({ Data: 'obj put 1', Links: [] }), + add2: () => ipfs.object.put({ Data: 'obj put 2', Links: [] }), + resToCid: (res) => res.toString() + }, { + name: 'block put', + add1: () => ipfs.block.put(Buffer.from('block put 1'), null), + add2: () => ipfs.block.put(Buffer.from('block put 2'), null), + resToCid: (res) => res.cid.toString() + }] + + describe('locks', function () { + for (const test of blockAddTests) { + // eslint-disable-next-line no-loop-func + it(`garbage collection should wait for pending ${test.name} to finish`, async () => { + // Add blocks to IPFS + // Note: add operation will take a read lock + const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const add1 = test.add1() + + // Once add lock has been requested, start GC + await addLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second add + await gcStarted + const add2 = test.add2() + + const deleted = (await gc).map(i => i.cid) + const add1Res = test.resToCid(await add1) + const add2Res = test.resToCid(await add2) + + // Should have garbage collected blocks from first add, because GC should + // have waited for first add to finish + expect(deleted).includes(add1Res) + + // Should not have garbage collected blocks from second add, because + // second add should have waited for GC to finish + expect(deleted).not.includes(add2Res) + }) + } + + it('garbage collection should wait for pending add + pin to finish', async () => { + // Add blocks to IPFS + // Note: add operation will take a read lock + const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const add1 = ipfs.add(fixtures[0], { pin: true }) + + // Once add lock has been requested, start GC + await addLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second add + await gcStarted + const add2 = ipfs.add(fixtures[1], { pin: true }) + + const deleted = (await gc).map(i => i.cid) + const add1Res = (await add1)[0].hash + const add2Res = (await add2)[0].hash + + // Should not have garbage collected blocks from first add, because GC should + // have waited for first add + pin to finish (protected by pin) + expect(deleted).not.includes(add1Res) + + // Should not have garbage collected blocks from second add, because + // second add should have waited for GC to finish + expect(deleted).not.includes(add2Res) + }) + + it('garbage collection should wait for pending block rm to finish', async () => { + // Add two blocks so that we can remove them + const cid1 = (await ipfs.block.put(Buffer.from('block to rm 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to rm 2'), null)).cid + + // Remove first block from IPFS + // Note: block rm will take a write lock + const rmLockRequested = pEvent(ipfs._gcLock, 'writeLock request') + const rm1 = ipfs.block.rm(cid1) + + // Once rm lock has been requested, start GC + await rmLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second rm + await gcStarted + const rm2 = ipfs.block.rm(cid2) + + const deleted = (await gc).map(i => i.cid) + await rm1 + + // Second rm should fail because GC has already removed that block + try { + await rm2 + } catch (err) { + expect(err.code).eql('ERR_DB_DELETE_FAILED') + } + + // Confirm second second block has been removed + const localRefs = (await ipfs.refs.local()).map(r => r.ref) + expect(localRefs).not.includes(cid2.toString()) + + // Should not have garbage collected block from first block put, because + // GC should have waited for first rm (removing first block put) to finish + expect(deleted).not.includes(cid1.toString()) + + // Should have garbage collected block from second block put, because GC + // should have completed before second rm (removing second block put) + expect(deleted).includes(cid2.toString()) + }) + + it('garbage collection should wait for pending pin add to finish', async () => { + // Add two blocks so that we can pin them + const cid1 = (await ipfs.block.put(Buffer.from('block to pin add 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to pin add 2'), null)).cid + + // Pin first block + // Note: pin add will take a read lock + const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pin1 = ipfs.pin.add(cid1) + + // Once pin lock has been requested, start GC + await pinLockRequested + const gc = ipfs.repo.gc() + const deleted = (await gc).map(i => i.cid) + await pin1 + + // TODO: Adding pin for removed block never returns, which means the lock + // never gets released + // const pin2 = ipfs.pin.add(cid2) + + // Confirm second second block has been removed + const localRefs = (await ipfs.refs.local()).map(r => r.ref) + expect(localRefs).not.includes(cid2.toString()) + + // Should not have garbage collected block from first block put, because + // GC should have waited for pin (protecting first block put) to finish + expect(deleted).not.includes(cid1.toString()) + + // Should have garbage collected block from second block put, because GC + // should have completed before second pin + expect(deleted).includes(cid2.toString()) + }) + + it('garbage collection should wait for pending pin rm to finish', async () => { + // Add two blocks so that we can pin them + const cid1 = (await ipfs.block.put(Buffer.from('block to pin rm 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to pin rm 2'), null)).cid + + // Pin blocks + await Promise.all([ipfs.pin.add(cid1), ipfs.pin.add(cid2)]) + + // Unpin first block + // Note: pin rm will take a read lock + const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pinRm1 = ipfs.pin.rm(cid1) + + // Once pin lock has been requested, start GC + await pinLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second pin rm + await gcStarted + const pinRm2 = ipfs.pin.rm(cid2) + + const deleted = (await gc).map(i => i.cid) + await pinRm1 + await pinRm2 + + // Should have garbage collected block from first block put, because + // GC should have waited for pin rm (unpinning first block put) to finish + expect(deleted).includes(cid1.toString()) + + // Should not have garbage collected block from second block put, because + // GC should have completed before second block was unpinned + expect(deleted).not.includes(cid2.toString()) + }) + }) +}) From d970a3275d44856a54064459e957a213450d5259 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:24:11 -0400 Subject: [PATCH 16/56] refactor: rebase --- .../files-regular/add-pull-stream.js | 2 - .../files-regular/refs-pull-stream.js | 80 ----------------- src/http/api/resources/files-regular.js | 86 ------------------- 3 files changed, 168 deletions(-) diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index 7a77d4685d..47a7ef8517 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -156,8 +156,6 @@ module.exports = function (self) { } opts.progress = progress - - return self._gcLock.pullReadLock(() => pull( pullMap(content => normalizeContent(content, opts)), pullFlatten(), diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 8447192ae0..aec4ba8a6d 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -178,83 +178,3 @@ function getNodeLinks (node, path = '') { } return links } - -// Do a depth first search of the DAG, starting from the given root cid -function objectStream (ipfs, rootCid, maxDepth, isUnique) { - const uniques = new Set() - - const root = { node: { cid: rootCid }, depth: 0 } - const traverseLevel = (obj) => { - const { node, depth } = obj - - // Check the depth - const nextLevelDepth = depth + 1 - if (nextLevelDepth > maxDepth) { - return pull.empty() - } - - // If unique option is enabled, check if the CID has been seen before. - // Note we need to do this here rather than before adding to the stream - // so that the unique check happens in the order that items are examined - // in the DAG. - if (isUnique) { - if (uniques.has(node.cid.toString())) { - // Mark this object as a duplicate so we can filter it out later - obj.isDuplicate = true - return pull.empty() - } - uniques.add(node.cid.toString()) - } - - const deferred = pullDefer.source() - - // Get this object's links - getLinks(ipfs, node.cid, (err, links) => { - if (err) { - if (err.code === 'ERR_NOT_FOUND') { - err.message = `Could not find object with CID: ${node.cid}` - } - return deferred.resolve(pull.error(err)) - } - - // Add to the stream each link, parent and the new depth - const vals = links.map(link => ({ - parent: node, - node: link, - depth: nextLevelDepth - })) - - deferred.resolve(pull.values(vals)) - }) - - return deferred - } - - return pullTraverse.depthFirst(root, traverseLevel) -} - -// Fetch a node from IPLD then get all its links -function getLinks (ipfs, cid, callback) { - ipfs._ipld.get(new CID(cid), (err, node) => { - if (err) { - return callback(err) - } - callback(null, node.value.links || getNodeLinks(node.value)) - }) -} - -// Recursively search the node for CIDs -function getNodeLinks (node, path = '') { - let links = [] - for (const [name, value] of Object.entries(node)) { - if (CID.isCID(value)) { - links.push({ - name: path + name, - cid: value - }) - } else if (typeof value === 'object') { - links = links.concat(getNodeLinks(value, path + name + '/')) - } - } - return links -} diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 1419abf247..dbd1f71a53 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -416,89 +416,3 @@ function sendRefsReplyStream (request, h, desc, source) { .header('content-type', 'application/json') .header('Trailer', 'X-Stream-Error') } - -exports.refs = { - validate: { - query: Joi.object().keys({ - recursive: Joi.boolean().default(false), - format: Joi.string().default(Format.default), - edges: Joi.boolean().default(false), - unique: Joi.boolean().default(false), - 'max-depth': Joi.number().integer().min(-1) - }).unknown() - }, - - // uses common parseKey method that returns a `key` - parseArgs: exports.parseKey, - - // main route handler which is called after the above `parseArgs`, but only if the args were valid - handler (request, h) { - const { ipfs } = request.server.app - const { key } = request.pre.args - - const recursive = request.query.recursive - const format = request.query.format - const edges = request.query.edges - const unique = request.query.unique - const maxDepth = request.query['max-depth'] - - const source = ipfs.refsPullStream(key, { recursive, format, edges, unique, maxDepth }) - return sendRefsReplyStream(request, h, `refs for ${key}`, source) - } -} - -exports.refs.local = { - // main route handler - handler (request, h) { - const { ipfs } = request.server.app - const source = ipfs.refs.localPullStream() - return sendRefsReplyStream(request, h, 'local refs', source) - } -} - -function sendRefsReplyStream (request, h, desc, source) { - const replyStream = pushable() - const aborter = abortable() - - const stream = toStream.source(pull( - replyStream, - aborter, - ndjson.serialize() - )) - - // const stream = toStream.source(replyStream.source) - // hapi is not very clever and throws if no - // - _read method - // - _readableState object - // are there :( - if (!stream._read) { - stream._read = () => {} - stream._readableState = {} - stream.unpipe = () => {} - } - - pull( - source, - pull.drain( - (ref) => replyStream.push({ Ref: ref.ref, Err: ref.err }), - (err) => { - if (err) { - request.raw.res.addTrailers({ - 'X-Stream-Error': JSON.stringify({ - Message: `Failed to get ${desc}: ${err.message || ''}`, - Code: 0 - }) - }) - return aborter.abort() - } - - replyStream.end() - } - ) - ) - - return h.response(stream) - .header('x-chunked-output', '1') - .header('content-type', 'application/json') - .header('Trailer', 'X-Stream-Error') -} From 3ec57d9d53b3ad6ea5f8cccbe1d87f323aac16f5 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:43:12 -0400 Subject: [PATCH 17/56] fix: gc use uppercase dag.Links --- src/core/components/gc.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 21b54e6cf2..f04d80bcda 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -79,7 +79,7 @@ function createMarkedSet (ipfs, callback) { // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - cb(null, [cid, ...obj.value.links.map(l => l.cid)]) + cb(null, [cid, ...obj.value.Links.map(l => l.Hash)]) }) }), From 255dee3d3a4a8e86af2239184f3a5a05d8ef20a1 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:44:15 -0400 Subject: [PATCH 18/56] chore: update package.json deps --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index ece6616503..ddf79813ed 100644 --- a/package.json +++ b/package.json @@ -65,7 +65,7 @@ "array-shuffle": "^1.0.1", "async": "^2.6.1", "async-iterator-all": "^1.0.0", - "async-iterator-to-pull-stream": "^1.1.0", + "async-iterator-to-pull-stream": "^1.3.0", "async-iterator-to-stream": "^1.1.0", "base32.js": "~0.1.0", "bignumber.js": "^9.0.0", @@ -139,6 +139,7 @@ "merge-options": "^1.0.1", "mime-types": "^2.1.21", "mkdirp": "~0.5.1", + "mortice": "dirkmc/mortice#fix/read-then-write", "multiaddr": "^6.1.0", "multiaddr-to-uri": "^5.0.0", "multibase": "~0.6.0", From c8d1f08a1122adc85fca4525975de8c1ff3cfcc7 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:48:33 -0400 Subject: [PATCH 19/56] chore: rebase --- test/utils/ipfs-exec.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index 7094a1639a..2e764632c5 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -7,7 +7,6 @@ const expect = chai.expect chai.use(dirtyChai) const path = require('path') const _ = require('lodash') -const yargs = require('yargs') // This is our new test utility to easily check and execute ipfs cli commands. // @@ -34,10 +33,11 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { - // Adding '--' at the front of the command allows us to parse commands that - // have a parameter with spaces in it, eg - // ipfs refs --format=" -> " - const cp = exec(yargs('-- ' + args[0]).argv._) + if (args.length === 1) { + args = args[0].split(' ') + } + + const cp = exec(args) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: // https://github.com/shelljs/shelljs/blob/master/src/tempdir.js#L43 From 568a1d9d563c747981d2e504f82a1fb69638c0b7 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 10:36:27 -0400 Subject: [PATCH 20/56] chore: add joi to package.json --- src/http/api/resources/repo.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index 9ca4734a1c..ce59dc3123 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -1,6 +1,6 @@ 'use strict' -const Joi = require('joi') +const Joi = require('@hapi/joi') exports.gc = { validate: { From c0007afc94ac95d1d4b677af05e233417d44d5fe Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 16:04:16 -0400 Subject: [PATCH 21/56] refactor: pin/gc common code --- package.json | 2 +- src/core/components/pin.js | 341 +++------------------ src/core/components/{ => pin}/gc-lock.js | 0 src/core/components/{ => pin}/gc.js | 32 +- src/core/components/pin/pin-manager.js | 301 ++++++++++++++++++ src/core/components/{ => pin}/pin-set.js | 2 + src/core/components/{ => pin}/pin.proto.js | 0 src/core/components/repo.js | 2 +- test/core/pin-set.js | 2 +- 9 files changed, 357 insertions(+), 325 deletions(-) rename src/core/components/{ => pin}/gc-lock.js (100%) rename src/core/components/{ => pin}/gc.js (78%) create mode 100644 src/core/components/pin/pin-manager.js rename src/core/components/{ => pin}/pin-set.js (99%) rename src/core/components/{ => pin}/pin.proto.js (100%) diff --git a/package.json b/package.json index ddf79813ed..ccb1470cd4 100644 --- a/package.json +++ b/package.json @@ -95,7 +95,7 @@ "ipfs-bitswap": "~0.25.1", "ipfs-block": "~0.8.1", "ipfs-block-service": "~0.15.2", - "ipfs-http-client": "^33.1.0", + "ipfs-http-client": "ipfs/js-ipfs-http-client#feat/gc", "ipfs-http-response": "~0.3.1", "ipfs-mfs": "~0.12.0", "ipfs-multipart": "~0.1.1", diff --git a/src/core/components/pin.js b/src/core/components/pin.js index e5ab3a6b33..4b77269343 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -2,192 +2,25 @@ 'use strict' const promisify = require('promisify-es6') -const { DAGNode, DAGLink } = require('ipld-dag-pb') const CID = require('cids') const map = require('async/map') const mapSeries = require('async/mapSeries') -const series = require('async/series') -const parallel = require('async/parallel') -const eachLimit = require('async/eachLimit') const waterfall = require('async/waterfall') -const detectLimit = require('async/detectLimit') const setImmediate = require('async/setImmediate') -const queue = require('async/queue') -const { Key } = require('interface-datastore') const errCode = require('err-code') const multibase = require('multibase') -const multicodec = require('multicodec') -const createPinSet = require('./pin-set') const { resolvePath } = require('../utils') - -// arbitrary limit to the number of concurrent dag operations -const concurrencyLimit = 300 -const pinDataStoreKey = new Key('/local/pins') +const PinManager = require('./pin/pin-manager') +const PinTypes = PinManager.PinTypes function toB58String (hash) { return new CID(hash).toBaseEncodedString() } -function invalidPinTypeErr (type) { - const errMsg = `Invalid type '${type}', must be one of {direct, indirect, recursive, all}` - return errCode(new Error(errMsg), 'ERR_INVALID_PIN_TYPE') -} - module.exports = (self) => { - const repo = self._repo const dag = self.dag - const pinset = createPinSet(dag) - const types = { - direct: 'direct', - recursive: 'recursive', - indirect: 'indirect', - all: 'all' - } - - let directPins = new Set() - let recursivePins = new Set() - - const directKeys = () => - Array.from(directPins).map(key => new CID(key).buffer) - const recursiveKeys = () => - Array.from(recursivePins).map(key => new CID(key).buffer) - - function walkDag ({ cid, preload = false, onCid = () => {} }, cb) { - const q = queue(function ({ cid }, done) { - dag.get(cid, { preload }, function (err, result) { - if (err) { - return done(err) - } - - onCid(cid) - - if (result.value.Links) { - q.push(result.value.Links.map(link => ({ - cid: link.Hash - }))) - } - - done() - }) - }, concurrencyLimit) - q.drain = () => { - cb() - } - q.error = (err) => { - q.kill() - cb(err) - } - q.push({ cid }) - } - - function getIndirectKeys ({ preload }, callback) { - const indirectKeys = new Set() - eachLimit(recursiveKeys(), concurrencyLimit, (multihash, cb) => { - // load every hash in the graph - walkDag({ - cid: new CID(multihash), - preload: preload || false, - onCid: (cid) => { - cid = cid.toString() - - // recursive pins pre-empt indirect pins - if (!recursivePins.has(cid)) { - indirectKeys.add(cid) - } - } - }, cb) - }, (err) => { - if (err) { return callback(err) } - callback(null, Array.from(indirectKeys)) - }) - } - - // Encode and write pin key sets to the datastore: - // a DAGLink for each of the recursive and direct pinsets - // a DAGNode holding those as DAGLinks, a kind of root pin - function flushPins (callback) { - let dLink, rLink, root - series([ - // create a DAGLink to the node with direct pins - cb => waterfall([ - cb => pinset.storeSet(directKeys(), cb), - ({ node, cid }, cb) => { - try { - cb(null, new DAGLink(types.direct, node.size, cid)) - } catch (err) { - cb(err) - } - }, - (link, cb) => { dLink = link; cb(null) } - ], cb), - - // create a DAGLink to the node with recursive pins - cb => waterfall([ - cb => pinset.storeSet(recursiveKeys(), cb), - ({ node, cid }, cb) => { - try { - cb(null, new DAGLink(types.recursive, node.size, cid)) - } catch (err) { - cb(err) - } - }, - (link, cb) => { rLink = link; cb(null) } - ], cb), - - // the pin-set nodes link to a special 'empty' node, so make sure it exists - cb => { - let empty - - try { - empty = DAGNode.create(Buffer.alloc(0)) - } catch (err) { - return cb(err) - } - - dag.put(empty, { - version: 0, - format: multicodec.DAG_PB, - hashAlg: multicodec.SHA2_256, - preload: false - }, cb) - }, - - // create a root node with DAGLinks to the direct and recursive DAGs - cb => { - let node - - try { - node = DAGNode.create(Buffer.alloc(0), [dLink, rLink]) - } catch (err) { - return cb(err) - } - - root = node - dag.put(root, { - version: 0, - format: multicodec.DAG_PB, - hashAlg: multicodec.SHA2_256, - preload: false - }, (err, cid) => { - if (!err) { - root.multihash = cid.buffer - } - cb(err) - }) - }, - - // hack for CLI tests - cb => repo.closed ? repo.open(cb) : cb(null, null), - - // save root to datastore under a consistent key - cb => repo.datastore.put(pinDataStoreKey, root.multihash, cb) - ], (err, res) => { - if (err) { return callback(err) } - self.log(`Flushed pins with root: ${root}`) - return callback(null, root) - }) - } + const pinManager = new PinManager(self._repo, dag, self.log) const pin = { add: promisify((paths, options, callback) => { @@ -210,24 +43,24 @@ module.exports = (self) => { const key = cid.toBaseEncodedString() if (recursive) { - if (recursivePins.has(key)) { + if (pinManager.recursivePins.has(key)) { // it's already pinned recursively return cb(null, key) } // entire graph of nested links should be pinned, // so make sure we have all the objects - walkDag({ - dag, - cid, - preload: options.preload - }, (err) => cb(err, key)) + pinManager.fetchCompleteDag(key, { preload: options.preload }, (err) => { + if (err) { return cb(err) } + // found all objects, we can add the pin + return cb(null, key) + }) } else { - if (recursivePins.has(key)) { + if (pinManager.recursivePins.has(key)) { // recursive supersedes direct, can't have both return cb(new Error(`${key} already pinned recursively`)) } - if (directPins.has(key)) { + if (pinManager.directPins.has(key)) { // already directly pinned return cb(null, key) } @@ -243,11 +76,11 @@ module.exports = (self) => { if (err) { return pinComplete(err) } // update the pin sets in memory - const pinset = recursive ? recursivePins : directPins + const pinset = recursive ? pinManager.recursivePins : pinManager.directPins results.forEach(key => pinset.add(key)) // persist updated pin sets to datastore - flushPins((err, root) => { + pinManager.flushPins((err, root) => { if (err) { return pinComplete(err) } pinComplete(null, results.map(hash => ({ hash }))) }) @@ -286,7 +119,7 @@ module.exports = (self) => { self._gcLock.readLock((lockCb) => { // verify that each hash can be unpinned map(mhs, (multihash, cb) => { - pin._isPinnedWithType(multihash, types.all, (err, res) => { + pinManager.isPinnedWithType(multihash, PinTypes.all, (err, res) => { if (err) { return cb(err) } const { pinned, reason } = res const key = toB58String(multihash) @@ -295,13 +128,13 @@ module.exports = (self) => { } switch (reason) { - case (types.recursive): + case (PinTypes.recursive): if (recursive) { return cb(null, key) } else { return cb(new Error(`${key} is pinned recursively`)) } - case (types.direct): + case (PinTypes.direct): return cb(null, key) default: return cb(new Error( @@ -314,15 +147,15 @@ module.exports = (self) => { // update the pin sets in memory results.forEach(key => { - if (recursive && recursivePins.has(key)) { - recursivePins.delete(key) + if (recursive && pinManager.recursivePins.has(key)) { + pinManager.recursivePins.delete(key) } else { - directPins.delete(key) + pinManager.directPins.delete(key) } }) // persist updated pin sets to datastore - flushPins((err, root) => { + pinManager.flushPins((err, root) => { if (err) { return lockCb(err) } self.log(`Removed pins: ${results}`) lockCb(null, results.map(hash => ({ hash }))) @@ -333,7 +166,7 @@ module.exports = (self) => { }), ls: promisify((paths, options, callback) => { - let type = types.all + let type = PinTypes.all if (typeof paths === 'function') { callback = paths options = {} @@ -350,27 +183,28 @@ module.exports = (self) => { options = options || {} if (options.type) { - if (typeof options.type !== 'string') { - return setImmediate(() => callback(invalidPinTypeErr(options.type))) + type = options.type + if (typeof options.type === 'string') { + type = options.type.toLowerCase() + } + const err = PinManager.checkPinType(type) + if (err) { + return setImmediate(() => callback(err)) } - type = options.type.toLowerCase() - } - if (!Object.keys(types).includes(type)) { - return setImmediate(() => callback(invalidPinTypeErr(type))) } if (paths) { // check the pinned state of specific hashes waterfall([ (cb) => resolvePath(self.object, paths, cb), - (hashes, cb) => mapSeries(hashes, (hash, done) => pin._isPinnedWithType(hash, type, done), cb), + (hashes, cb) => mapSeries(hashes, (hash, done) => pinManager.isPinnedWithType(hash, type, done), cb), (results, cb) => { results = results .filter(result => result.pinned) .map(({ key, reason }) => { switch (reason) { - case types.direct: - case types.recursive: + case PinTypes.direct: + case PinTypes.recursive: return { hash: key, type: reason @@ -378,7 +212,7 @@ module.exports = (self) => { default: return { hash: key, - type: `${types.indirect} through ${reason}` + type: `${PinTypes.indirect} through ${reason}` } } }) @@ -393,34 +227,37 @@ module.exports = (self) => { } else { // show all pinned items of type let pins = [] - if (type === types.direct || type === types.all) { + + if (type === PinTypes.direct || type === PinTypes.all) { pins = pins.concat( - Array.from(directPins).map(hash => ({ - type: types.direct, + Array.from(pinManager.directPins).map(hash => ({ + type: PinTypes.direct, hash })) ) } - if (type === types.recursive || type === types.all) { + + if (type === PinTypes.recursive || type === PinTypes.all) { pins = pins.concat( - Array.from(recursivePins).map(hash => ({ - type: types.recursive, + Array.from(pinManager.recursivePins).map(hash => ({ + type: PinTypes.recursive, hash })) ) } - if (type === types.indirect || type === types.all) { - getIndirectKeys(options, (err, indirects) => { + + if (type === PinTypes.indirect || type === PinTypes.all) { + pinManager.getIndirectKeys((err, indirects) => { if (err) { return callback(err) } pins = pins // if something is pinned both directly and indirectly, // report the indirect entry .filter(({ hash }) => !indirects.includes(hash) || - (indirects.includes(hash) && !directPins.has(hash)) + (indirects.includes(hash) && !pinManager.directPins.has(hash)) ) .concat(indirects.map(hash => ({ - type: types.indirect, + type: PinTypes.indirect, hash }))) return callback(null, pins) @@ -431,93 +268,9 @@ module.exports = (self) => { } }), - _isPinnedWithType: promisify((multihash, type, callback) => { - const key = toB58String(multihash) - const { recursive, direct, all } = types - - // recursive - if ((type === recursive || type === all) && recursivePins.has(key)) { - return callback(null, { - key, - pinned: true, - reason: recursive - }) - } - - if (type === recursive) { - return callback(null, { - key, - pinned: false - }) - } - - // direct - if ((type === direct || type === all) && directPins.has(key)) { - return callback(null, { - key, - pinned: true, - reason: direct - }) - } + _getInternalBlocks: (cb) => pinManager.getInternalBlocks(cb), - if (type === direct) { - return callback(null, { - key, - pinned: false - }) - } - - // indirect (default) - // check each recursive key to see if multihash is under it - // arbitrary limit, enables handling 1000s of pins. - detectLimit(recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { - waterfall([ - (done) => dag.get(cid, '', { preload: false }, done), - (result, done) => done(null, result.value), - (node, done) => pinset.hasDescendant(node, key, done) - ], cb) - }, (err, cid) => callback(err, { - key, - pinned: Boolean(cid), - reason: cid - })) - }), - - _load: promisify(callback => { - waterfall([ - // hack for CLI tests - (cb) => repo.closed ? repo.datastore.open(cb) : cb(null, null), - (_, cb) => repo.datastore.has(pinDataStoreKey, cb), - (has, cb) => has ? cb() : cb(new Error('No pins to load')), - (cb) => repo.datastore.get(pinDataStoreKey, cb), - (mh, cb) => { - dag.get(new CID(mh), '', { preload: false }, cb) - } - ], (err, pinRoot) => { - if (err) { - if (err.message === 'No pins to load') { - self.log('No pins to load') - return callback() - } else { - return callback(err) - } - } - - parallel([ - cb => pinset.loadSet(pinRoot.value, types.recursive, cb), - cb => pinset.loadSet(pinRoot.value, types.direct, cb) - ], (err, keys) => { - if (err) { return callback(err) } - const [rKeys, dKeys] = keys - - directPins = new Set(dKeys.map(toB58String)) - recursivePins = new Set(rKeys.map(toB58String)) - - self.log('Loaded pins from the datastore') - return callback(null) - }) - }) - }) + _load: (cb) => pinManager.load(cb) } return pin diff --git a/src/core/components/gc-lock.js b/src/core/components/pin/gc-lock.js similarity index 100% rename from src/core/components/gc-lock.js rename to src/core/components/pin/gc-lock.js diff --git a/src/core/components/gc.js b/src/core/components/pin/gc.js similarity index 78% rename from src/core/components/gc.js rename to src/core/components/pin/gc.js index f04d80bcda..4ff06db698 100644 --- a/src/core/components/gc.js +++ b/src/core/components/pin/gc.js @@ -10,7 +10,6 @@ const log = require('debug')('ipfs:gc') // Limit on the number of parallel block remove operations const BLOCK_RM_CONCURRENCY = 256 -const PIN_DS_KEY = new Key('/local/pins') const MFS_ROOT_DS_KEY = new Key('/local/filesroot') // Perform mark and sweep garbage collection @@ -47,10 +46,6 @@ module.exports = function gc (self) { // Get Set of CIDs of blocks to keep function createMarkedSet (ipfs, callback) { parallel([ - // "Empty block" used by the pinner - // TODO: This CID is replicated in pin.js - (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), - // All pins, direct and indirect (cb) => ipfs.pin.ls((err, pins) => { if (err) { @@ -61,31 +56,12 @@ function createMarkedSet (ipfs, callback) { }), // Blocks used internally by the pinner - (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { - if (err) { - if (err.code === 'ERR_NOT_FOUND') { - log(`No pinned blocks`) - return cb(null, []) - } - return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) - } - - const cid = new CID(mh) - ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { - if (err) { - return cb(new Error(`Could not get pin sets from store: ${err.message}`)) - } - - // The pinner stores an object that has two links to pin sets: - // 1. The directly pinned CIDs - // 2. The recursively pinned CIDs - cb(null, [cid, ...obj.value.Links.map(l => l.Hash)]) - }) - }), + (cb) => ipfs.pin._getInternalBlocks(cb), // The MFS root and all its descendants - (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { + (cb) => ipfs._repo.root.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { + console.error(err) if (err.code === 'ERR_NOT_FOUND') { log(`No blocks in MFS`) return cb(null, []) @@ -100,7 +76,7 @@ function createMarkedSet (ipfs, callback) { return callback(err) } - const cids = res.flat().map(cid => cid.toV1().toString('base32')) + const cids = [].concat(...res).map(cid => cid.toV1().toString('base32')) return callback(null, new Set(cids)) }) } diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js new file mode 100644 index 0000000000..912ddd5a41 --- /dev/null +++ b/src/core/components/pin/pin-manager.js @@ -0,0 +1,301 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +'use strict' + +const { DAGNode, DAGLink, util } = require('ipld-dag-pb') +const CID = require('cids') +const map = require('async/map') +const series = require('async/series') +const parallel = require('async/parallel') +const eachLimit = require('async/eachLimit') +const waterfall = require('async/waterfall') +const detectLimit = require('async/detectLimit') +const { Key } = require('interface-datastore') +const errCode = require('err-code') +const multicodec = require('multicodec') + +const createPinSet = require('./pin-set') +const { EMPTY_KEY_HASH } = createPinSet + +// arbitrary limit to the number of concurrent dag operations +const concurrencyLimit = 300 +const PIN_DS_KEY = new Key('/local/pins') + +function toB58String (hash) { + return new CID(hash).toBaseEncodedString() +} + +function invalidPinTypeErr (type) { + const errMsg = `Invalid type '${type}', must be one of {direct, indirect, recursive, all}` + return errCode(new Error(errMsg), 'ERR_INVALID_PIN_TYPE') +} + +const PinTypes = { + direct: 'direct', + recursive: 'recursive', + indirect: 'indirect', + all: 'all' +} + +class PinManager { + constructor (repo, dag, log) { + this.repo = repo + this.dag = dag + this.log = log + this.pinset = createPinSet(dag) + this.directPins = new Set() + this.recursivePins = new Set() + } + + directKeys () { + return Array.from(this.directPins).map(key => new CID(key).buffer) + } + + recursiveKeys () { + return Array.from(this.recursivePins).map(key => new CID(key).buffer) + } + + getIndirectKeys (callback) { + const indirectKeys = new Set() + eachLimit(this.recursiveKeys(), concurrencyLimit, (multihash, cb) => { + this.dag._getRecursive(multihash, (err, nodes) => { + if (err) { + return cb(err) + } + + map(nodes, (node, cb) => util.cid(util.serialize(node), { + cidVersion: 0 + }).then(cid => cb(null, cid), cb), (err, cids) => { + if (err) { + return cb(err) + } + + cids + .map(cid => cid.toString()) + // recursive pins pre-empt indirect pins + .filter(key => !this.recursivePins.has(key)) + .forEach(key => indirectKeys.add(key)) + + cb() + }) + }) + }, (err) => { + if (err) { return callback(err) } + callback(null, Array.from(indirectKeys)) + }) + } + + // Encode and write pin key sets to the datastore: + // a DAGLink for each of the recursive and direct pinsets + // a DAGNode holding those as DAGLinks, a kind of root pin + flushPins (callback) { + let dLink, rLink, root + series([ + // create a DAGLink to the node with direct pins + cb => waterfall([ + cb => this.pinset.storeSet(this.directKeys(), cb), + ({ node, cid }, cb) => { + try { + cb(null, new DAGLink(PinTypes.direct, node.size, cid)) + } catch (err) { + cb(err) + } + }, + (link, cb) => { dLink = link; cb(null) } + ], cb), + + // create a DAGLink to the node with recursive pins + cb => waterfall([ + cb => this.pinset.storeSet(this.recursiveKeys(), cb), + ({ node, cid }, cb) => { + try { + cb(null, new DAGLink(PinTypes.recursive, node.size, cid)) + } catch (err) { + cb(err) + } + }, + (link, cb) => { rLink = link; cb(null) } + ], cb), + + // the pin-set nodes link to a special 'empty' node, so make sure it exists + cb => { + let empty + + try { + empty = DAGNode.create(Buffer.alloc(0)) + } catch (err) { + return cb(err) + } + + this.dag.put(empty, { + version: 0, + format: multicodec.DAG_PB, + hashAlg: multicodec.SHA2_256, + preload: false + }, cb) + }, + + // create a root node with DAGLinks to the direct and recursive DAGs + cb => { + let node + + try { + node = DAGNode.create(Buffer.alloc(0), [dLink, rLink]) + } catch (err) { + return cb(err) + } + + root = node + this.dag.put(root, { + version: 0, + format: multicodec.DAG_PB, + hashAlg: multicodec.SHA2_256, + preload: false + }, (err, cid) => { + if (!err) { + root.multihash = cid.buffer + } + cb(err) + }) + }, + + // hack for CLI tests + cb => this.repo.closed ? this.repo.open(cb) : cb(null, null), + + // save root to datastore under a consistent key + cb => this.repo.datastore.put(PIN_DS_KEY, root.multihash, cb) + ], (err, res) => { + if (err) { return callback(err) } + this.log(`Flushed pins with root: ${root}`) + return callback(null, root) + }) + } + + load (callback) { + waterfall([ + // hack for CLI tests + (cb) => this.repo.closed ? this.repo.datastore.open(cb) : cb(null, null), + (_, cb) => this.repo.datastore.has(PIN_DS_KEY, cb), + (has, cb) => has ? cb() : cb(new Error('No pins to load')), + (cb) => this.repo.datastore.get(PIN_DS_KEY, cb), + (mh, cb) => { + this.dag.get(new CID(mh), '', { preload: false }, cb) + } + ], (err, pinRoot) => { + if (err) { + if (err.message === 'No pins to load') { + this.log('No pins to load') + return callback() + } else { + return callback(err) + } + } + + parallel([ + cb => this.pinset.loadSet(pinRoot.value, PinTypes.recursive, cb), + cb => this.pinset.loadSet(pinRoot.value, PinTypes.direct, cb) + ], (err, keys) => { + if (err) { return callback(err) } + const [ rKeys, dKeys ] = keys + + this.directPins = new Set(dKeys.map(toB58String)) + this.recursivePins = new Set(rKeys.map(toB58String)) + + this.log('Loaded pins from the datastore') + return callback(null) + }) + }) + } + + isPinnedWithType (multihash, type, callback) { + const key = toB58String(multihash) + const { recursive, direct, all } = PinTypes + + // recursive + if ((type === recursive || type === all) && this.recursivePins.has(key)) { + return callback(null, { + key, + pinned: true, + reason: recursive + }) + } + + if (type === recursive) { + return callback(null, { + key, + pinned: false + }) + } + + // direct + if ((type === direct || type === all) && this.directPins.has(key)) { + return callback(null, { + key, + pinned: true, + reason: direct + }) + } + + if (type === direct) { + return callback(null, { + key, + pinned: false + }) + } + + // indirect (default) + // check each recursive key to see if multihash is under it + // arbitrary limit, enables handling 1000s of pins. + detectLimit(this.recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { + waterfall([ + (done) => this.dag.get(cid, '', { preload: false }, done), + (result, done) => done(null, result.value), + (node, done) => this.pinset.hasDescendant(node, key, done) + ], cb) + }, (err, cid) => callback(err, { + key, + pinned: Boolean(cid), + reason: cid + })) + } + + // Gets CIDs of blocks used internally by the pinner + getInternalBlocks (callback) { + this.repo.datastore.get(PIN_DS_KEY, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + this.log(`No pinned blocks`) + return callback(null, []) + } + return callback(new Error(`Could not get pin sets root from datastore: ${err.message}`)) + } + + const cid = new CID(mh) + this.dag.get(cid, '', { preload: false }, (err, obj) => { + if (err) { + return callback(new Error(`Could not get pin sets from store: ${err.message}`)) + } + + // "Empty block" used by the pinner + const emptyBlockCid = new CID(EMPTY_KEY_HASH) + + // The pinner stores an object that has two links to pin sets: + // 1. The directly pinned CIDs + // 2. The recursively pinned CIDs + const pinSetCids = [cid, ...obj.value.Links.map(l => l.Hash)] + + callback(null, pinSetCids.concat([emptyBlockCid])) + }) + }) + } + + // Returns an error if the pin type is invalid + static checkPinType (type) { + if (typeof type !== 'string' || !Object.keys(PinTypes).includes(type)) { + return invalidPinTypeErr(type) + } + } +} + +PinManager.PinTypes = PinTypes + +module.exports = PinManager diff --git a/src/core/components/pin-set.js b/src/core/components/pin/pin-set.js similarity index 99% rename from src/core/components/pin-set.js rename to src/core/components/pin/pin-set.js index 6f3a9f98dc..5dc0564a13 100644 --- a/src/core/components/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -270,3 +270,5 @@ exports = module.exports = function (dag) { } return pinSet } + +module.exports.EMPTY_KEY_HASH = emptyKeyHash diff --git a/src/core/components/pin.proto.js b/src/core/components/pin/pin.proto.js similarity index 100% rename from src/core/components/pin.proto.js rename to src/core/components/pin/pin.proto.js diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 76d9262096..25b7cf02ea 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -38,7 +38,7 @@ module.exports = function repo (self) { }) }), - gc: require('./gc')(self), + gc: require('./pin/gc')(self), stat: promisify((options, callback) => { if (typeof options === 'function') { diff --git a/test/core/pin-set.js b/test/core/pin-set.js index 3df518bc27..a36567c37c 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -19,7 +19,7 @@ const { const CID = require('cids') const IPFS = require('../../src/core') -const createPinSet = require('../../src/core/components/pin-set') +const createPinSet = require('../../src/core/components/pin/pin-set') const createTempRepo = require('../utils/create-repo-nodejs') const defaultFanout = 256 From 8de0c2b4168680417bf34676f05f9e9f495684d2 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 16:53:37 -0400 Subject: [PATCH 22/56] fix: browser gc tests --- test/core/gc.spec.js | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 8b7a48645b..a669c3dd03 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -7,20 +7,24 @@ const dirtyChai = require('dirty-chai') const expect = chai.expect chai.use(dirtyChai) -const fs = require('fs') - const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') const pEvent = require('p-event') describe('gc', function () { - const fixtures = [ - 'test/fixtures/planets/mercury/wiki.md', - 'test/fixtures/planets/solar-system.md' - ].map(path => ({ - path, - content: fs.readFileSync(path) - })) + const fixtures = [{ + path: 'test/my/path1', + content: Buffer.from('path1') + }, { + path: 'test/my/path2', + content: Buffer.from('path2') + }, { + path: 'test/my/path3', + content: Buffer.from('path3') + }, { + path: 'test/my/path4', + content: Buffer.from('path4') + }] let ipfs let repo @@ -98,7 +102,7 @@ describe('gc', function () { // Add blocks to IPFS // Note: add operation will take a read lock const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') - const add1 = ipfs.add(fixtures[0], { pin: true }) + const add1 = ipfs.add(fixtures[2], { pin: true }) // Once add lock has been requested, start GC await addLockRequested @@ -108,7 +112,7 @@ describe('gc', function () { // Once GC has started, start second add await gcStarted - const add2 = ipfs.add(fixtures[1], { pin: true }) + const add2 = ipfs.add(fixtures[3], { pin: true }) const deleted = (await gc).map(i => i.cid) const add1Res = (await add1)[0].hash From 28b615d72420dad2ba600024b7e515d9bf657cba Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 18:33:51 -0400 Subject: [PATCH 23/56] fix: gc parsing of block cid in browser --- src/core/components/pin.js | 6 +++--- src/core/components/pin/gc.js | 24 +++++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index 4b77269343..43da3cd540 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -268,9 +268,9 @@ module.exports = (self) => { } }), - _getInternalBlocks: (cb) => pinManager.getInternalBlocks(cb), - - _load: (cb) => pinManager.load(cb) + _isPinnedWithType: promisify(pinManager.isPinnedWithType.bind(pinManager)), + _getInternalBlocks: promisify(pinManager.getInternalBlocks.bind(pinManager)), + _load: promisify(pinManager.load.bind(pinManager)) } return pin diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 4ff06db698..4ac98393d5 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -56,12 +56,17 @@ function createMarkedSet (ipfs, callback) { }), // Blocks used internally by the pinner - (cb) => ipfs.pin._getInternalBlocks(cb), + (cb) => ipfs.pin._getInternalBlocks((err, cids) => { + if (err) { + return cb(new Error(`Could not list pinner internal blocks: ${err.message}`)) + } + log(`Found ${cids.length} pinner internal blocks`) + cb(null, cids) + }), // The MFS root and all its descendants (cb) => ipfs._repo.root.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { - console.error(err) if (err.code === 'ERR_NOT_FOUND') { log(`No blocks in MFS`) return cb(null, []) @@ -87,7 +92,7 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - log(`Found ${refs.length} MFS blocks`) + log(`Found ${1 + refs.length} MFS blocks`) callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } @@ -98,19 +103,24 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } const unreferenced = [] const res = [] + let errCount = 0 for (const { key: k } of blocks) { try { const cid = dsKeyToCid(k) - if (!markedSet.has(cid.toV1().toString('base32'))) { + const b32 = cid.toV1().toString('base32') + if (!markedSet.has(b32)) { unreferenced.push(cid) } } catch (err) { - res.push({ err: new Error(`Could not convert block with key '${k}' to CID: ${err.message}`) }) + errCount++ + const msg = `Could not convert block with key '${k}' to CID: ${err.message}` + log(msg) + res.push({ err: new Error(msg) }) } } const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + - `Deleting ${unreferenced.length} blocks.` + `Deleting ${unreferenced.length} blocks.` + (errCount ? ` (${errCount} errors)` : '') log(msg) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { @@ -133,5 +143,5 @@ function dsKeyToCid (key) { // Block key is of the form / const decoder = new base32.Decoder() const buff = decoder.write(key.toString().slice(1)).finalize() - return new CID(buff) + return new CID(Buffer.from(buff)) } From 05ae894141fd9042439de0e65cd508ccb52a86c1 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 11:52:19 -0400 Subject: [PATCH 24/56] test: add gc-lock tests --- test/core/gc-lock.spec.js | 197 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 test/core/gc-lock.spec.js diff --git a/test/core/gc-lock.spec.js b/test/core/gc-lock.spec.js new file mode 100644 index 0000000000..41ea1b0d57 --- /dev/null +++ b/test/core/gc-lock.spec.js @@ -0,0 +1,197 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +const dirtyChai = require('dirty-chai') +const expect = chai.expect +chai.use(dirtyChai) + +const parallel = require('async/parallel') +const pull = require('pull-stream') +const GCLock = require('../../src/core/components/pin/gc-lock') + +const cbTakeLock = (type, lock, out, id, duration) => { + return (cb) => lock[type + 'Lock']((lockCb) => { + out.push(`${type} ${id} start`) + setTimeout(() => { + out.push(`${type} ${id} end`) + lockCb() + }, duration) + }, cb) +} +const cbReadLock = (lock, out, id, duration) => { + return cbTakeLock('read', lock, out, id, duration) +} +const cbWriteLock = (lock, out, id, duration) => { + return cbTakeLock('write', lock, out, id, duration) +} + +const pullTakeLock = (type, lock, out, id, duration) => { + const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' + const vals = ['a', 'b', 'c'] + return (cb) => { + pull( + pull.values(vals), + lock[lockFn](() => { + let started = false + return pull( + pull.through((i) => { + if (!started) { + out.push(`${type} ${id} start`) + started = true + } + }), + pull.asyncMap((i, cb) => { + setTimeout(() => cb(null, i), duration / vals.length) + }) + ) + }), + pull.collect(() => { + out.push(`${type} ${id} end`) + cb() + }) + ) + } +} +const pullReadLock = (lock, out, id, duration) => { + return pullTakeLock('read', lock, out, id, duration) +} +const pullWriteLock = (lock, out, id, duration) => { + return pullTakeLock('write', lock, out, id, duration) +} + +const expectResult = (out, exp, done) => { + return () => { + try { + expect(out).to.eql(exp) + } catch (err) { + return done(err) + } + done() + } +} + +const runTests = (suiteName, { readLock, writeLock }) => { + describe(suiteName, () => { + it('multiple simultaneous reads', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + readLock(lock, out, 2, 200), + readLock(lock, out, 3, 300) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 3 start', + 'read 1 end', + 'read 2 end', + 'read 3 end' + ], done)) + }) + + it('multiple simultaneous writes', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + writeLock(lock, out, 2, 200), + writeLock(lock, out, 3, 300) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'write 2 start', + 'write 2 end', + 'write 3 start', + 'write 3 end' + ], done)) + }) + + it('read then write then read', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + writeLock(lock, out, 1, 100), + readLock(lock, out, 2, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 1 end', + 'write 1 start', + 'write 1 end', + 'read 2 start', + 'read 2 end' + ], done)) + }) + + it('write then read then write', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + readLock(lock, out, 1, 100), + writeLock(lock, out, 2, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'read 1 start', + 'read 1 end', + 'write 2 start', + 'write 2 end' + ], done)) + }) + + it('two simultaneous reads then write then read', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + readLock(lock, out, 2, 200), + writeLock(lock, out, 1, 100), + readLock(lock, out, 3, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 1 end', + 'read 2 end', + 'write 1 start', + 'write 1 end', + 'read 3 start', + 'read 3 end' + ], done)) + }) + + it('two simultaneous writes then read then write', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + writeLock(lock, out, 2, 100), + readLock(lock, out, 1, 100), + writeLock(lock, out, 3, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'write 2 start', + 'write 2 end', + 'read 1 start', + 'read 1 end', + 'write 3 start', + 'write 3 end' + ], done)) + }) + }) +} + +describe('gc-lock', function () { + runTests('cb style lock', { + readLock: cbReadLock, + writeLock: cbWriteLock + }) + + runTests('pull stream style lock', { + readLock: pullReadLock, + writeLock: pullWriteLock + }) +}) From 8b524448b5f45237676a70778b41769a29356526 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 12:48:24 -0400 Subject: [PATCH 25/56] fix: gc lock error handling --- src/core/components/pin/gc-lock.js | 13 ++-- test/core/gc-lock.spec.js | 110 +++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 9 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 4a427c6d13..f9e0728025 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -80,8 +80,9 @@ class PullLocker { // Returns a Promise that resolves when the locked piece of code completes locked () { - return new Promise((resolve) => { - this.releaseLock = resolve + return new Promise((resolve, reject) => { + this.releaseLock = (err) => err ? reject(err) : resolve() + log(`[${this.lockId}] ${this.type} (pull) started`) this.emitter.emit(`${this.type} start`, this.lockId) @@ -101,6 +102,10 @@ class PullLocker { this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock this.lock = this.mutex[this.type](() => this.locked()) + // If there is an error, it gets passed through to the caller using + // pull streams, so here we just catch the error and ignore it so + // that there isn't an UnhandledPromiseRejectionWarning + this.lock.catch(() => {}) } // Wait for the mutex to give us permission @@ -111,10 +116,10 @@ class PullLocker { // Releases the lock release () { - return pull.through(null, () => { + return pull.through(null, (err) => { log(`[${this.lockId}] ${this.type} (pull) released`) this.emitter.emit(`${this.type} release`, this.lockId) - this.releaseLock() + this.releaseLock(err) }) } } diff --git a/test/core/gc-lock.spec.js b/test/core/gc-lock.spec.js index 41ea1b0d57..96e9a3c838 100644 --- a/test/core/gc-lock.spec.js +++ b/test/core/gc-lock.spec.js @@ -1,4 +1,3 @@ -/* eslint max-nested-callbacks: ["error", 8] */ /* eslint-env mocha */ 'use strict' @@ -26,6 +25,24 @@ const cbReadLock = (lock, out, id, duration) => { const cbWriteLock = (lock, out, id, duration) => { return cbTakeLock('write', lock, out, id, duration) } +const cbTakeLockError = (type, lock, out, errs, id, duration) => { + return (cb) => lock[type + 'Lock']((lockCb) => { + out.push(`${type} ${id} start`) + setTimeout(() => { + out.push(`${type} ${id} error`) + lockCb(new Error('err')) + }, duration) + }, (err) => { + errs.push(err) + cb() + }) +} +const cbReadLockError = (lock, out, errs, id, duration) => { + return cbTakeLockError('read', lock, out, errs, id, duration) +} +const cbWriteLockError = (lock, out, errs, id, duration) => { + return cbTakeLockError('write', lock, out, errs, id, duration) +} const pullTakeLock = (type, lock, out, id, duration) => { const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' @@ -60,11 +77,54 @@ const pullReadLock = (lock, out, id, duration) => { const pullWriteLock = (lock, out, id, duration) => { return pullTakeLock('write', lock, out, id, duration) } +const pullTakeLockError = (type, lock, out, errs, id, duration) => { + const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' + const vals = ['a', 'b', 'c'] + return (cb) => { + pull( + pull.values(vals), + lock[lockFn](() => { + let started = false + return pull( + pull.through((i) => { + if (!started) { + out.push(`${type} ${id} start`) + started = true + } + }), + pull.asyncMap((i, cb) => { + setTimeout(() => cb(new Error('err')), duration) + }) + ) + }), + pull.collect((err) => { + out.push(`${type} ${id} error`) + errs.push(err) + cb() + }) + ) + } +} +const pullReadLockError = (lock, out, errs, id, duration) => { + return pullTakeLockError('read', lock, out, errs, id, duration) +} +const pullWriteLockError = (lock, out, errs, id, duration) => { + return pullTakeLockError('write', lock, out, errs, id, duration) +} -const expectResult = (out, exp, done) => { +const expectResult = (out, exp, errs, expErrCount, done) => { + if (typeof errs === 'function') { + done = errs + } return () => { try { expect(out).to.eql(exp) + if (typeof expErrCount === 'number') { + expect(errs.length).to.eql(expErrCount) + for (const e of errs) { + expect(e.message).to.eql('err') + } + } } catch (err) { return done(err) } @@ -72,7 +132,7 @@ const expectResult = (out, exp, done) => { } } -const runTests = (suiteName, { readLock, writeLock }) => { +const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockError }) => { describe(suiteName, () => { it('multiple simultaneous reads', (done) => { const lock = new GCLock() @@ -181,17 +241,57 @@ const runTests = (suiteName, { readLock, writeLock }) => { 'write 3 end' ], done)) }) + + it('simultaneous reads with error then write', (done) => { + const lock = new GCLock() + const out = [] + const errs = [] + parallel([ + readLockError(lock, out, errs, 1, 100), + readLock(lock, out, 2, 200), + writeLock(lock, out, 1, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 1 error', + 'read 2 end', + 'write 1 start', + 'write 1 end' + ], errs, 1, done)) + }) + + it('simultaneous writes with error then read', (done) => { + const lock = new GCLock() + const out = [] + const errs = [] + parallel([ + writeLockError(lock, out, errs, 1, 100), + writeLock(lock, out, 2, 100), + readLock(lock, out, 1, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 error', + 'write 2 start', + 'write 2 end', + 'read 1 start', + 'read 1 end' + ], errs, 1, done)) + }) }) } describe('gc-lock', function () { runTests('cb style lock', { readLock: cbReadLock, - writeLock: cbWriteLock + writeLock: cbWriteLock, + readLockError: cbReadLockError, + writeLockError: cbWriteLockError }) runTests('pull stream style lock', { readLock: pullReadLock, - writeLock: pullWriteLock + writeLock: pullWriteLock, + readLockError: pullReadLockError, + writeLockError: pullWriteLockError }) }) From 0c7cfdf6a197e733f6a4924440a9ca75e49b1256 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 18:15:32 -0400 Subject: [PATCH 26/56] fix: gc - take pin lock after resolve --- src/core/components/pin.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index 43da3cd540..d71aa3d1f6 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -36,7 +36,7 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - const pin = (pinComplete) => { + const pinAdd = (pinComplete) => { // verify that each hash can be pinned map(mhs, (multihash, cb) => { const cid = new CID(multihash) @@ -91,9 +91,9 @@ module.exports = (self) => { // is complete, so don't take a second lock here const lock = options.lock !== false if (lock) { - self._gcLock.readLock(pin, callback) + self._gcLock.readLock(pinAdd, callback) } else { - pin(callback) + pinAdd(callback) } }) }), From 81e3dd0be0582d2e09cfca742e366f23230bae3c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 19:12:19 -0400 Subject: [PATCH 27/56] fix: make sure each GCLock instance uses distinct mutex --- src/core/components/pin/gc-lock.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index f9e0728025..3ec3256111 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -8,7 +8,13 @@ const log = require('debug')('ipfs:gc:lock') class GCLock extends EventEmitter { constructor () { super() - this.mutex = mortice() + + // Ensure that we get a different mutex for each instance of GCLock + // (There should only be one GCLock instance per IPFS instance, but + // there may be multiple IPFS instances, eg in unit tests) + const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() + this.mutex = mortice(randId) + this.lockId = 0 } From 7898ca2d965fd6cf358df0e31e451daa2af56913 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 11:07:58 -0400 Subject: [PATCH 28/56] fix: choose non-overlapping port for GC test --- test/core/gc.spec.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index a669c3dd03..790b8a4068 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -11,7 +11,7 @@ const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') const pEvent = require('p-event') -describe('gc', function () { +describe('pin-gc', function () { const fixtures = [{ path: 'test/my/path1', content: Buffer.from('path1') @@ -35,7 +35,13 @@ describe('gc', function () { ipfs = new IPFS({ repo, config: { - Bootstrap: [] + Bootstrap: [], + Addresses: { + Swarm: [ + // Pick a port that doesn't overlap with the test in pin.js + '/ip4/0.0.0.0/tcp/4102' + ] + } } }) ipfs.on('ready', done) From bdcbddb2b41046520146f204efd964c99d7debed Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 12:55:46 -0400 Subject: [PATCH 29/56] fix: better gc test port config --- test/core/gc.spec.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 790b8a4068..1ab371ce9f 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -7,11 +7,12 @@ const dirtyChai = require('dirty-chai') const expect = chai.expect chai.use(dirtyChai) +const isNode = require('detect-node') +const pEvent = require('p-event') const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') -const pEvent = require('p-event') -describe('pin-gc', function () { +describe('gc', function () { const fixtures = [{ path: 'test/my/path1', content: Buffer.from('path1') @@ -32,18 +33,13 @@ describe('pin-gc', function () { before(function (done) { this.timeout(20 * 1000) repo = createTempRepo() - ipfs = new IPFS({ - repo, - config: { - Bootstrap: [], - Addresses: { - Swarm: [ - // Pick a port that doesn't overlap with the test in pin.js - '/ip4/0.0.0.0/tcp/4102' - ] - } + let config = { Bootstrap: [] } + if (isNode) { + config.Addresses = { + Swarm: ['/ip4/127.0.0.1/tcp/0'] } - }) + } + ipfs = new IPFS({ repo, config }) ipfs.on('ready', done) }) From a07ed7f182682d1853eb0e297462c6a423d4702e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 13:48:59 -0400 Subject: [PATCH 30/56] test: increase timeout for repo gc test --- test/cli/repo.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/cli/repo.js b/test/cli/repo.js index 9fd5d8f19d..b49c6ea396 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -21,7 +21,9 @@ describe('repo', () => runOnAndOff((thing) => { }) // Note: There are more comprehensive GC tests in interface-js-ipfs-core - it('should run garbage collection', async () => { + it('should run garbage collection', async function () { + this.timeout(60000) + // Add a file to IPFS const cid = (await ipfs(`add -Q ${fixturePath}`)).trim() From ef86efc9382cc175c246b5e2e16f4524b2a5dcea Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 14:07:46 -0400 Subject: [PATCH 31/56] fix: webworkers + mortice --- src/core/components/pin/gc-lock.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 3ec3256111..8601c47da8 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -13,7 +13,9 @@ class GCLock extends EventEmitter { // (There should only be one GCLock instance per IPFS instance, but // there may be multiple IPFS instances, eg in unit tests) const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() - this.mutex = mortice(randId) + this.mutex = mortice(randId, { + singleProcess: true + }) this.lockId = 0 } From 75159a068f461661d14ba68accb3c3269163d3f8 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 15:30:10 -0400 Subject: [PATCH 32/56] chore: refactor mortice options --- src/core/components/pin/gc-lock.js | 4 ++-- src/core/index.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 8601c47da8..2dc540b8fb 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -6,7 +6,7 @@ const EventEmitter = require('events') const log = require('debug')('ipfs:gc:lock') class GCLock extends EventEmitter { - constructor () { + constructor (repoOwner) { super() // Ensure that we get a different mutex for each instance of GCLock @@ -14,7 +14,7 @@ class GCLock extends EventEmitter { // there may be multiple IPFS instances, eg in unit tests) const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() this.mutex = mortice(randId, { - singleProcess: true + singleProcess: repoOwner }) this.lockId = 0 diff --git a/src/core/index.js b/src/core/index.js index 82a92854e5..89a3d2ae72 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -91,7 +91,7 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log - this._gcLock = new GCLock() + this._gcLock = new GCLock(this._options.repoOwner) // IPFS Core exposed components // - for booting up a node From c2b5ef62185eb1f128f7da39508b85b4f17c0683 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 15:32:59 -0400 Subject: [PATCH 33/56] fix: gc rm test on Windows --- test/core/gc.spec.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 1ab371ce9f..cf9593c6a6 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -211,7 +211,8 @@ describe('gc', function () { const cid2 = (await ipfs.block.put(Buffer.from('block to pin rm 2'), null)).cid // Pin blocks - await Promise.all([ipfs.pin.add(cid1), ipfs.pin.add(cid2)]) + await ipfs.pin.add(cid1) + await ipfs.pin.add(cid2) // Unpin first block // Note: pin rm will take a read lock From 7cfc53b87f8ca8c926e9d3c8f62a5ca16c78ba09 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 7 Jun 2019 15:40:45 -0400 Subject: [PATCH 34/56] fix: ensure gc filters all internal pins --- src/core/components/pin/pin-manager.js | 14 ++++---- src/core/components/pin/pin-set.js | 29 ++++++++++++--- test/core/pin-set.js | 50 ++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index 912ddd5a41..cecad5f37d 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -14,7 +14,6 @@ const errCode = require('err-code') const multicodec = require('multicodec') const createPinSet = require('./pin-set') -const { EMPTY_KEY_HASH } = createPinSet // arbitrary limit to the number of concurrent dag operations const concurrencyLimit = 300 @@ -275,15 +274,18 @@ class PinManager { return callback(new Error(`Could not get pin sets from store: ${err.message}`)) } - // "Empty block" used by the pinner - const emptyBlockCid = new CID(EMPTY_KEY_HASH) - // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - const pinSetCids = [cid, ...obj.value.Links.map(l => l.Hash)] + // If large enough, these pin sets may have links to buckets to hold + // the pins + this.pinset.getInternalCids(obj.value, (err, cids) => { + if (err) { + return callback(new Error(`Could not get pinner internal cids: ${err.message}`)) + } - callback(null, pinSetCids.concat([emptyBlockCid])) + callback(null, cids.concat(cid)) + }) }) }) } diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index 5dc0564a13..fd29be54bf 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -8,6 +8,7 @@ const varint = require('varint') const { DAGNode, DAGLink } = require('ipld-dag-pb') const multicodec = require('multicodec') const someSeries = require('async/someSeries') +const eachSeries = require('async/eachSeries') const eachOfSeries = require('async/eachOfSeries') const pbSchema = require('./pin.proto') @@ -239,6 +240,10 @@ exports = module.exports = function (dag) { }, walkItems: (node, step, callback) => { + pinSet.walkAll(node, step, () => {}, callback) + }, + + walkAll: (node, stepPin, stepBin, callback) => { let pbh try { pbh = readHeader(node) @@ -253,22 +258,38 @@ exports = module.exports = function (dag) { const linkHash = link.Hash.buffer if (!emptyKey.equals(linkHash)) { + stepBin(link, idx, pbh.data) + // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkItems(res.value, step, eachCb) + pinSet.walkItems(res.value, stepBin, eachCb) }) } } else { // otherwise, the link is a pin - step(link, idx, pbh.data) + stepPin(link, idx, pbh.data) } eachCb(null) }, callback) + }, + + getInternalCids: (rootNode, callback) => { + // "Empty block" used by the pinner + const cids = [new CID(emptyKey)] + + const step = link => cids.push(link.Hash) + eachSeries(rootNode.Links, (topLevelLink, cb) => { + cids.push(topLevelLink.Hash) + + dag.get(topLevelLink.Hash, '', { preload: false }, (err, res) => { + if (err) { return cb(err) } + + pinSet.walkAll(res.value, () => {}, step, cb) + }) + }, (err) => callback(err, cids)) } } return pinSet } - -module.exports.EMPTY_KEY_HASH = emptyKeyHash diff --git a/test/core/pin-set.js b/test/core/pin-set.js index a36567c37c..c85890a209 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -22,6 +22,7 @@ const IPFS = require('../../src/core') const createPinSet = require('../../src/core/components/pin/pin-set') const createTempRepo = require('../utils/create-repo-nodejs') +const emptyKeyHash = 'QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n' const defaultFanout = 256 const maxItems = 8192 @@ -181,12 +182,12 @@ describe('pinSet', function () { }) }) - describe('walkItems', function () { + describe('walkAll', function () { it(`fails if node doesn't have a pin-set protobuf header`, function (done) { createNode('datum', (err, node) => { expect(err).to.not.exist() - pinSet.walkItems(node, () => {}, (err, res) => { + pinSet.walkAll(node, () => {}, () => {}, (err, res) => { expect(err).to.exist() expect(res).to.not.exist() done() @@ -194,6 +195,29 @@ describe('pinSet', function () { }) }) + it('visits all links of a root node', function (done) { + this.timeout(90 * 1000) + + const seen = [] + const walker = (link, idx, data) => seen.push({ link, idx, data }) + + createNodes(maxItems + 1, (err, nodes) => { + expect(err).to.not.exist() + + pinSet.storeSet(nodes, (err, result) => { + expect(err).to.not.exist() + + pinSet.walkAll(result.node, () => {}, walker, err => { + expect(err).to.not.exist() + expect(seen).to.have.length(maxItems + defaultFanout + 1) + done() + }) + }) + }) + }) + }) + + describe('walkItems', function () { it('visits all non-fanout links of a root node', function (done) { const seen = [] const walker = (link, idx, data) => seen.push({ link, idx, data }) @@ -218,4 +242,26 @@ describe('pinSet', function () { }) }) }) + + describe('getInternalCids', function () { + it('gets all links and empty key CID', function (done) { + createNodes(defaultFanout, (err, nodes) => { + expect(err).to.not.exist() + + pinSet.storeSet(nodes, (err, result) => { + expect(err).to.not.exist() + + const rootNode = DAGNode.create('pins', [{ Hash: result.cid }]) + pinSet.getInternalCids(rootNode, (err, cids) => { + expect(err).to.not.exist() + expect(cids.length).to.eql(2) + const cidStrs = cids.map(c => c.toString()) + expect(cidStrs).includes(emptyKeyHash) + expect(cidStrs).includes(result.cid.toString()) + done() + }) + }) + }) + }) + }) }) From 026158fe905962462ce874f130b09ae254412a27 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 7 Jun 2019 15:43:58 -0400 Subject: [PATCH 35/56] test: enable gc tests over ipfs-http-client --- test/http-api/interface.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/http-api/interface.js b/test/http-api/interface.js index b94004085f..a3419e4be5 100644 --- a/test/http-api/interface.js +++ b/test/http-api/interface.js @@ -152,15 +152,7 @@ describe('interface-ipfs-core over ipfs-http-client tests', () => { } })) - tests.repo(defaultCommonFactory, { - skip: [ - // repo.gc - { - name: 'gc', - reason: 'TODO: repo.gc is not implemented in js-ipfs yet!' - } - ] - }) + tests.repo(defaultCommonFactory) tests.stats(defaultCommonFactory) From bf4e731ed7623db3a9141f327355676917cb75eb Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 10 Jun 2019 09:55:56 -0400 Subject: [PATCH 36/56] chore: better gc logging --- src/core/components/pin/gc.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 4ac98393d5..342fb75f91 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -52,7 +52,9 @@ function createMarkedSet (ipfs, callback) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } log(`Found ${pins.length} pinned blocks`) - cb(null, pins.map(p => new CID(p.hash))) + const cids = pins.map(p => new CID(p.hash)) + // log(' ' + cids.join('\n ')) + cb(null, cids) }), // Blocks used internally by the pinner @@ -61,6 +63,7 @@ function createMarkedSet (ipfs, callback) { return cb(new Error(`Could not list pinner internal blocks: ${err.message}`)) } log(`Found ${cids.length} pinner internal blocks`) + // log(' ' + cids.join('\n ')) cb(null, cids) }), @@ -92,8 +95,10 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - log(`Found ${1 + refs.length} MFS blocks`) - callback(null, [cid, ...refs.map(r => new CID(r.ref))]) + const cids = [cid, ...refs.map(r => new CID(r.ref))] + log(`Found ${cids.length} MFS blocks`) + // log(' ' + cids.join('\n ')) + callback(null, cids) }) } @@ -119,9 +124,10 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { } } - const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `Marked set has ${markedSet.size} unique blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` + (errCount ? ` (${errCount} errors)` : '') log(msg) + // log(' ' + unreferenced.join('\n ')) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore From 356e2637a67ad0ce558a4a85cc659cd8b4cdb1e4 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 14 Jun 2019 12:21:32 -0400 Subject: [PATCH 37/56] fix: pin walking --- src/core/components/pin/pin-set.js | 2 +- test/core/pin-set.js | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index fd29be54bf..c3ef75d057 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -263,7 +263,7 @@ exports = module.exports = function (dag) { // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkItems(res.value, stepBin, eachCb) + pinSet.walkAll(res.value, stepPin, stepBin, eachCb) }) } } else { diff --git a/test/core/pin-set.js b/test/core/pin-set.js index c85890a209..5de033480b 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -198,8 +198,10 @@ describe('pinSet', function () { it('visits all links of a root node', function (done) { this.timeout(90 * 1000) - const seen = [] - const walker = (link, idx, data) => seen.push({ link, idx, data }) + const seenPins = [] + const walkerPin = (link, idx, data) => seenPins.push({ link, idx, data }) + const seenBins = [] + const walkerBin = (link, idx, data) => seenBins.push({ link, idx, data }) createNodes(maxItems + 1, (err, nodes) => { expect(err).to.not.exist() @@ -207,9 +209,10 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkAll(result.node, () => {}, walker, err => { + pinSet.walkAll(result.node, walkerPin, walkerBin, err => { expect(err).to.not.exist() - expect(seen).to.have.length(maxItems + defaultFanout + 1) + expect(seenPins).to.have.length(maxItems + 1) + expect(seenBins).to.have.length(defaultFanout) done() }) }) From 58f34d6a480f22f0622d0cf1f4250b6131659e2c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 21 Jun 2019 15:48:58 -0400 Subject: [PATCH 38/56] refactor: pin set walking --- src/core/components/pin/pin-set.js | 16 ++++++---------- test/core/pin-set.js | 16 +++++++--------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index c3ef75d057..04a389bf2c 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -231,19 +231,15 @@ exports = module.exports = function (dag) { dag.get(link.Hash, '', { preload: false }, (err, res) => { if (err) { return callback(err) } const keys = [] - const step = link => keys.push(link.Hash.buffer) - pinSet.walkItems(res.value, step, err => { + const stepPin = link => keys.push(link.Hash.buffer) + pinSet.walkItems(res.value, { stepPin }, err => { if (err) { return callback(err) } return callback(null, keys) }) }) }, - walkItems: (node, step, callback) => { - pinSet.walkAll(node, step, () => {}, callback) - }, - - walkAll: (node, stepPin, stepBin, callback) => { + walkItems: (node, { stepPin = () => {}, stepBin = () => {} }, callback) => { let pbh try { pbh = readHeader(node) @@ -263,7 +259,7 @@ exports = module.exports = function (dag) { // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkAll(res.value, stepPin, stepBin, eachCb) + pinSet.walkItems(res.value, { stepPin, stepBin }, eachCb) }) } } else { @@ -279,14 +275,14 @@ exports = module.exports = function (dag) { // "Empty block" used by the pinner const cids = [new CID(emptyKey)] - const step = link => cids.push(link.Hash) + const stepBin = link => cids.push(link.Hash) eachSeries(rootNode.Links, (topLevelLink, cb) => { cids.push(topLevelLink.Hash) dag.get(topLevelLink.Hash, '', { preload: false }, (err, res) => { if (err) { return cb(err) } - pinSet.walkAll(res.value, () => {}, step, cb) + pinSet.walkItems(res.value, { stepBin }, cb) }) }, (err) => callback(err, cids)) } diff --git a/test/core/pin-set.js b/test/core/pin-set.js index 5de033480b..180c32167f 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -182,12 +182,12 @@ describe('pinSet', function () { }) }) - describe('walkAll', function () { + describe('walkItems', function () { it(`fails if node doesn't have a pin-set protobuf header`, function (done) { createNode('datum', (err, node) => { expect(err).to.not.exist() - pinSet.walkAll(node, () => {}, () => {}, (err, res) => { + pinSet.walkItems(node, {}, (err, res) => { expect(err).to.exist() expect(res).to.not.exist() done() @@ -199,9 +199,9 @@ describe('pinSet', function () { this.timeout(90 * 1000) const seenPins = [] - const walkerPin = (link, idx, data) => seenPins.push({ link, idx, data }) + const stepPin = (link, idx, data) => seenPins.push({ link, idx, data }) const seenBins = [] - const walkerBin = (link, idx, data) => seenBins.push({ link, idx, data }) + const stepBin = (link, idx, data) => seenBins.push({ link, idx, data }) createNodes(maxItems + 1, (err, nodes) => { expect(err).to.not.exist() @@ -209,7 +209,7 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkAll(result.node, walkerPin, walkerBin, err => { + pinSet.walkItems(result.node, { stepPin, stepBin }, err => { expect(err).to.not.exist() expect(seenPins).to.have.length(maxItems + 1) expect(seenBins).to.have.length(defaultFanout) @@ -218,12 +218,10 @@ describe('pinSet', function () { }) }) }) - }) - describe('walkItems', function () { it('visits all non-fanout links of a root node', function (done) { const seen = [] - const walker = (link, idx, data) => seen.push({ link, idx, data }) + const stepPin = (link, idx, data) => seen.push({ link, idx, data }) createNodes(defaultFanout, (err, nodes) => { expect(err).to.not.exist() @@ -231,7 +229,7 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkItems(result.node, walker, err => { + pinSet.walkItems(result.node, { stepPin }, err => { expect(err).to.not.exist() expect(seen).to.have.length(defaultFanout) expect(seen[0].idx).to.eql(defaultFanout) From 168046a572fd42bf1cab85431f5b133c9188f05a Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 9 Jul 2019 10:43:04 -0400 Subject: [PATCH 39/56] refactor: import pull modules directly --- src/core/components/pin/gc-lock.js | 8 +++++--- test/core/gc-lock.spec.js | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 2dc540b8fb..59c64e7002 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -2,6 +2,8 @@ const mortice = require('mortice') const pull = require('pull-stream') +const pullThrough = require('pull-stream/throughs/through') +const pullAsyncMap = require('pull-stream/throughs/async-map') const EventEmitter = require('events') const log = require('debug')('ipfs:gc:lock') @@ -50,7 +52,7 @@ class GCLock extends EventEmitter { }) const lock = this.mutex[type](locked) - return lock.then(res => cb(null, res)).catch(cb) + return lock.then(res => cb(null, res), cb) } pullReadLock (lockedPullFn) { @@ -104,7 +106,7 @@ class PullLocker { // the locked piece of code take () { return pull( - pull.asyncMap((i, cb) => { + pullAsyncMap((i, cb) => { if (!this.lock) { log(`[${this.lockId}] ${this.type} (pull) requested`) this.emitter.emit(`${this.type} request`, this.lockId) @@ -124,7 +126,7 @@ class PullLocker { // Releases the lock release () { - return pull.through(null, (err) => { + return pullThrough(null, (err) => { log(`[${this.lockId}] ${this.type} (pull) released`) this.emitter.emit(`${this.type} release`, this.lockId) this.releaseLock(err) diff --git a/test/core/gc-lock.spec.js b/test/core/gc-lock.spec.js index 96e9a3c838..d44255b1bd 100644 --- a/test/core/gc-lock.spec.js +++ b/test/core/gc-lock.spec.js @@ -8,6 +8,10 @@ chai.use(dirtyChai) const parallel = require('async/parallel') const pull = require('pull-stream') +const pullThrough = require('pull-stream/throughs/through') +const pullAsyncMap = require('pull-stream/throughs/async-map') +const pullCollect = require('pull-stream/sinks/collect') +const pullValues = require('pull-stream/sources/values') const GCLock = require('../../src/core/components/pin/gc-lock') const cbTakeLock = (type, lock, out, id, duration) => { @@ -49,22 +53,22 @@ const pullTakeLock = (type, lock, out, id, duration) => { const vals = ['a', 'b', 'c'] return (cb) => { pull( - pull.values(vals), + pullValues(vals), lock[lockFn](() => { let started = false return pull( - pull.through((i) => { + pullThrough((i) => { if (!started) { out.push(`${type} ${id} start`) started = true } }), - pull.asyncMap((i, cb) => { + pullAsyncMap((i, cb) => { setTimeout(() => cb(null, i), duration / vals.length) }) ) }), - pull.collect(() => { + pullCollect(() => { out.push(`${type} ${id} end`) cb() }) @@ -82,22 +86,22 @@ const pullTakeLockError = (type, lock, out, errs, id, duration) => { const vals = ['a', 'b', 'c'] return (cb) => { pull( - pull.values(vals), + pullValues(vals), lock[lockFn](() => { let started = false return pull( - pull.through((i) => { + pullThrough((i) => { if (!started) { out.push(`${type} ${id} start`) started = true } }), - pull.asyncMap((i, cb) => { + pullAsyncMap((i, cb) => { setTimeout(() => cb(new Error('err')), duration) }) ) }), - pull.collect((err) => { + pullCollect((err) => { out.push(`${type} ${id} error`) errs.push(err) cb() From 216e53a83e16af641814373584b3356c3ce501d7 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 9 Jul 2019 10:44:49 -0400 Subject: [PATCH 40/56] chore: update mortice package --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ccb1470cd4..6361f8d1f1 100644 --- a/package.json +++ b/package.json @@ -139,7 +139,7 @@ "merge-options": "^1.0.1", "mime-types": "^2.1.21", "mkdirp": "~0.5.1", - "mortice": "dirkmc/mortice#fix/read-then-write", + "mortice": "^1.2.2", "multiaddr": "^6.1.0", "multiaddr-to-uri": "^5.0.0", "multibase": "~0.6.0", From 471217883ed3679ad0a84fe1e248e1d1f0ef6fd8 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 9 Jul 2019 10:53:31 -0400 Subject: [PATCH 41/56] refactor: use assert.fail() instead of throwing for programmer err --- src/core/components/pin/gc-lock.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 59c64e7002..4cb255de38 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -1,5 +1,6 @@ 'use strict' +const assert = require('assert') const mortice = require('mortice') const pull = require('pull-stream') const pullThrough = require('pull-stream/throughs/through') @@ -32,10 +33,10 @@ class GCLock extends EventEmitter { lock (type, lockedFn, cb) { if (typeof lockedFn !== 'function') { - throw new Error(`first argument to ${type} must be a function`) + assert.fail(`first argument to GCLock.${type} must be a function`) } if (typeof cb !== 'function') { - throw new Error(`second argument to ${type} must be a callback function`) + assert.fail(`second argument to GCLock.${type} must be a callback function`) } const lockId = this.lockId++ From 681b5775b5b49891bac29c3226674856412ade63 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 9 Jul 2019 13:48:35 -0400 Subject: [PATCH 42/56] chore: lint fixes --- src/core/components/pin/gc.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 342fb75f91..9ddde77c1a 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -14,7 +14,7 @@ const MFS_ROOT_DS_KEY = new Key('/local/filesroot') // Perform mark and sweep garbage collection module.exports = function gc (self) { - return promisify(async (callback) => { + return promisify((callback) => { const start = Date.now() log(`Creating set of marked blocks`) From c81a9203e00b717357505bbaca7c8e6b66352f61 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 10 Jul 2019 10:29:59 -0400 Subject: [PATCH 43/56] chore: update ipfs-http-client --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6361f8d1f1..37729fdf4a 100644 --- a/package.json +++ b/package.json @@ -95,7 +95,7 @@ "ipfs-bitswap": "~0.25.1", "ipfs-block": "~0.8.1", "ipfs-block-service": "~0.15.2", - "ipfs-http-client": "ipfs/js-ipfs-http-client#feat/gc", + "ipfs-http-client": "^33.0.1", "ipfs-http-response": "~0.3.1", "ipfs-mfs": "~0.12.0", "ipfs-multipart": "~0.1.1", From 11a02ddc9c640b7066a63e20c88b11134d56b82e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 10 Jul 2019 12:01:59 -0400 Subject: [PATCH 44/56] fix: path to gc-lock --- src/core/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/index.js b/src/core/index.js index 89a3d2ae72..3f0958be58 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -20,7 +20,7 @@ const EventEmitter = require('events') const config = require('./config') const boot = require('./boot') const components = require('./components') -const GCLock = require('./components/gc-lock') +const GCLock = require('./components/pin/gc-lock') // replaced by repo-browser when running in the browser const defaultRepo = require('./runtime/repo-nodejs') From 1e4c97ad71ec6e81b2c68a0538b6ffb59023327b Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 15 Jul 2019 11:19:47 -0400 Subject: [PATCH 45/56] fix: apply review comments --- package.json | 1 + src/cli/commands/repo/gc.js | 4 +- src/core/components/block.js | 2 + .../files-regular/add-pull-stream.js | 2 + src/core/components/pin.js | 2 +- src/core/components/pin/gc-lock.js | 14 +++---- src/core/components/pin/gc.js | 42 ++++++++++--------- src/core/components/pin/pin-manager.js | 9 ++-- src/http/api/resources/repo.js | 5 ++- test/core/gc.spec.js | 10 ++--- 10 files changed, 48 insertions(+), 43 deletions(-) diff --git a/package.json b/package.json index 37729fdf4a..850b95ff2e 100644 --- a/package.json +++ b/package.json @@ -83,6 +83,7 @@ "debug": "^4.1.0", "dlv": "^1.1.3", "err-code": "^2.0.0", + "explain-error": "^1.0.4", "file-type": "^12.0.1", "fnv1a": "^1.0.1", "fsm-event": "^2.1.0", diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index d45f4fdd07..fb022f6bf3 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -1,7 +1,5 @@ 'use strict' -const { print } = require('../../utils') - module.exports = { command: 'gc', @@ -21,7 +19,7 @@ module.exports = { } }, - handler ({ getIpfs, quiet, streamErrors, resolve }) { + handler ({ getIpfs, print, quiet, streamErrors, resolve }) { resolve((async () => { const ipfs = await getIpfs() const res = await ipfs.repo.gc() diff --git a/src/core/components/block.js b/src/core/components/block.js index 2a7196bbc3..305ad2df3d 100644 --- a/src/core/components/block.js +++ b/src/core/components/block.js @@ -103,6 +103,8 @@ module.exports = function block (self) { return setImmediate(() => callback(errCode(err, 'ERR_INVALID_CID'))) } + // We need to take a write lock here to ensure that adding and removing + // blocks are exclusive operations self._gcLock.writeLock((cb) => self._blockService.delete(cid, cb), callback) }), stat: promisify((cid, options, callback) => { diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index 47a7ef8517..a81f3a8fe8 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -116,6 +116,8 @@ function pinFile (file, self, opts, cb) { const isRootDir = !file.path.includes('/') const shouldPin = pin && isRootDir && !opts.onlyHash && !opts.hashAlg if (shouldPin) { + // Note: we take a GC lock wherever we call addPullStream(), so tell + // pin.add() not to take a (second) GC lock return self.pin.add(file.hash, { preload: false, lock: false }, err => cb(err, file)) } else { cb(null, file) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index d71aa3d1f6..cfd9035a57 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -20,7 +20,7 @@ function toB58String (hash) { module.exports = (self) => { const dag = self.dag - const pinManager = new PinManager(self._repo, dag, self.log) + const pinManager = new PinManager(self._repo, dag) const pin = { add: promisify((paths, options, callback) => { diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 4cb255de38..30e3a9deb1 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -2,7 +2,7 @@ const assert = require('assert') const mortice = require('mortice') -const pull = require('pull-stream') +const pull = require('pull-stream/pull') const pullThrough = require('pull-stream/throughs/through') const pullAsyncMap = require('pull-stream/throughs/async-map') const EventEmitter = require('events') @@ -32,12 +32,8 @@ class GCLock extends EventEmitter { } lock (type, lockedFn, cb) { - if (typeof lockedFn !== 'function') { - assert.fail(`first argument to GCLock.${type} must be a function`) - } - if (typeof cb !== 'function') { - assert.fail(`second argument to GCLock.${type} must be a callback function`) - } + assert(typeof lockedFn === 'function', `first argument to GCLock.${type} must be a function`) + assert(typeof cb === 'function', `second argument to GCLock.${type} must be a callback function`) const lockId = this.lockId++ log(`[${lockId}] ${type} requested`) @@ -90,7 +86,7 @@ class PullLocker { } // Returns a Promise that resolves when the locked piece of code completes - locked () { + _locked () { return new Promise((resolve, reject) => { this.releaseLock = (err) => err ? reject(err) : resolve() @@ -112,7 +108,7 @@ class PullLocker { log(`[${this.lockId}] ${this.type} (pull) requested`) this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock - this.lock = this.mutex[this.type](() => this.locked()) + this.lock = this.mutex[this.type](() => this._locked()) // If there is an error, it gets passed through to the caller using // pull streams, so here we just catch the error and ignore it so // that there isn't an UnhandledPromiseRejectionWarning diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 9ddde77c1a..57a1dcbc00 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -6,6 +6,8 @@ const base32 = require('base32.js') const parallel = require('async/parallel') const mapLimit = require('async/mapLimit') const { Key } = require('interface-datastore') +const expErr = require('explain-error') +const { cidToString } = require('../../../utils/cid') const log = require('debug')('ipfs:gc') // Limit on the number of parallel block remove operations @@ -16,7 +18,7 @@ const MFS_ROOT_DS_KEY = new Key('/local/filesroot') module.exports = function gc (self) { return promisify((callback) => { const start = Date.now() - log(`Creating set of marked blocks`) + log('Creating set of marked blocks') self._gcLock.writeLock((lockCb) => { parallel([ @@ -24,16 +26,18 @@ module.exports = function gc (self) { (cb) => self._repo.blocks.query({ keysOnly: true }, cb), // Mark all blocks that are being used (cb) => createMarkedSet(self, cb) - ], (err, [blocks, markedSet]) => { + ], (err, [blockKeys, markedSet]) => { if (err) { - log(`Error - ${err.message}`) + log('GC failed to fetch all block keys and created marked set', err) return lockCb(err) } // Delete blocks that are not being used - deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { + deleteUnmarkedBlocks(self, markedSet, blockKeys, (err, res) => { + log(`Complete (${Date.now() - start}ms)`) + if (err) { - log(`Error - ${err.message}`) + log('GC failed to delete unmarked blocks', err) return lockCb(err) } lockCb(null, res) @@ -49,7 +53,7 @@ function createMarkedSet (ipfs, callback) { // All pins, direct and indirect (cb) => ipfs.pin.ls((err, pins) => { if (err) { - return cb(new Error(`Could not list pinned blocks: ${err.message}`)) + return cb(expErr(err, 'Could not list pinned blocks')) } log(`Found ${pins.length} pinned blocks`) const cids = pins.map(p => new CID(p.hash)) @@ -60,7 +64,7 @@ function createMarkedSet (ipfs, callback) { // Blocks used internally by the pinner (cb) => ipfs.pin._getInternalBlocks((err, cids) => { if (err) { - return cb(new Error(`Could not list pinner internal blocks: ${err.message}`)) + return cb(expErr(err, 'Could not list pinner internal blocks')) } log(`Found ${cids.length} pinner internal blocks`) // log(' ' + cids.join('\n ')) @@ -74,7 +78,7 @@ function createMarkedSet (ipfs, callback) { log(`No blocks in MFS`) return cb(null, []) } - return cb(new Error(`Could not get MFS root from datastore: ${err.message}`)) + return cb(expErr(err, 'Could not get MFS root from datastore')) } getDescendants(ipfs, new CID(mh), cb) @@ -84,7 +88,7 @@ function createMarkedSet (ipfs, callback) { return callback(err) } - const cids = [].concat(...res).map(cid => cid.toV1().toString('base32')) + const cids = [].concat(...res).map(cid => cidToString(cid, { base: 'base32' })) return callback(null, new Set(cids)) }) } @@ -93,7 +97,7 @@ function createMarkedSet (ipfs, callback) { function getDescendants (ipfs, cid, callback) { ipfs.refs(cid, { recursive: true }, (err, refs) => { if (err) { - return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) + return callback(expErr(err, 'Could not get MFS root descendants from store')) } const cids = [cid, ...refs.map(r => new CID(r.ref))] log(`Found ${cids.length} MFS blocks`) @@ -103,13 +107,13 @@ function getDescendants (ipfs, cid, callback) { } // Delete all blocks that are not marked as in use -function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { +function deleteUnmarkedBlocks (ipfs, markedSet, blockKeys, callback) { // Iterate through all blocks and find those that are not in the marked set - // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } + // The blockKeys variable has the form [ { key: Key() }, { key: Key() }, ... ] const unreferenced = [] const res = [] let errCount = 0 - for (const { key: k } of blocks) { + for (const { key: k } of blockKeys) { try { const cid = dsKeyToCid(k) const b32 = cid.toV1().toString('base32') @@ -118,13 +122,13 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { } } catch (err) { errCount++ - const msg = `Could not convert block with key '${k}' to CID: ${err.message}` - log(msg) - res.push({ err: new Error(msg) }) + const msg = `Could not convert block with key '${k}' to CID` + log(msg, err) + res.push({ err: new Error(msg + `: ${err.message}`) }) } } - const msg = `Marked set has ${markedSet.size} unique blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `Marked set has ${markedSet.size} unique blocks. Blockstore has ${blockKeys.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` + (errCount ? ` (${errCount} errors)` : '') log(msg) // log(' ' + unreferenced.join('\n ')) @@ -133,14 +137,12 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { // Delete blocks from blockstore ipfs._repo.blocks.delete(cid, (err) => { const res = { - cid: cid.toString(), + cid, err: err && new Error(`Could not delete block with CID ${cid}: ${err.message}`) } cb(null, res) }) }, (_, delRes) => { - log(`Complete (${Date.now() - start}ms)`) - callback(null, res.concat(delRes)) }) } diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index cecad5f37d..f1ae8140e2 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -12,6 +12,7 @@ const detectLimit = require('async/detectLimit') const { Key } = require('interface-datastore') const errCode = require('err-code') const multicodec = require('multicodec') +const debug = require('debug') const createPinSet = require('./pin-set') @@ -36,21 +37,21 @@ const PinTypes = { } class PinManager { - constructor (repo, dag, log) { + constructor (repo, dag) { this.repo = repo this.dag = dag - this.log = log + this.log = debug('ipfs:pin') this.pinset = createPinSet(dag) this.directPins = new Set() this.recursivePins = new Set() } directKeys () { - return Array.from(this.directPins).map(key => new CID(key).buffer) + return Array.from(this.directPins, key => new CID(key).buffer) } recursiveKeys () { - return Array.from(this.recursivePins).map(key => new CID(key).buffer) + return Array.from(this.recursivePins, key => new CID(key).buffer) } getIndirectKeys (callback) { diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index ce59dc3123..108ce6abda 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -16,7 +16,10 @@ exports.gc = { const filtered = res.filter(r => !r.err || streamErrors) const response = filtered.map(r => { - return { Err: r.err, Key: !r.err && { '/': r.cid } } + return { + Err: r.err && r.err.message, + Key: !r.err && { '/': r.cid.toString() } + } }) return h.response(response) } diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index cf9593c6a6..92eae9ae28 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -86,7 +86,7 @@ describe('gc', function () { await gcStarted const add2 = test.add2() - const deleted = (await gc).map(i => i.cid) + const deleted = (await gc).map(i => i.cid.toString()) const add1Res = test.resToCid(await add1) const add2Res = test.resToCid(await add2) @@ -116,7 +116,7 @@ describe('gc', function () { await gcStarted const add2 = ipfs.add(fixtures[3], { pin: true }) - const deleted = (await gc).map(i => i.cid) + const deleted = (await gc).map(i => i.cid.toString()) const add1Res = (await add1)[0].hash const add2Res = (await add2)[0].hash @@ -149,7 +149,7 @@ describe('gc', function () { await gcStarted const rm2 = ipfs.block.rm(cid2) - const deleted = (await gc).map(i => i.cid) + const deleted = (await gc).map(i => i.cid.toString()) await rm1 // Second rm should fail because GC has already removed that block @@ -185,7 +185,7 @@ describe('gc', function () { // Once pin lock has been requested, start GC await pinLockRequested const gc = ipfs.repo.gc() - const deleted = (await gc).map(i => i.cid) + const deleted = (await gc).map(i => i.cid.toString()) await pin1 // TODO: Adding pin for removed block never returns, which means the lock @@ -229,7 +229,7 @@ describe('gc', function () { await gcStarted const pinRm2 = ipfs.pin.rm(cid2) - const deleted = (await gc).map(i => i.cid) + const deleted = (await gc).map(i => i.cid.toString()) await pinRm1 await pinRm2 From a8c362ca62758c47f83f557f1c867579127b5520 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 17 Jul 2019 12:48:52 -0400 Subject: [PATCH 46/56] chore: address review comments --- package.json | 1 + src/core/components/pin/gc-lock.js | 3 ++- src/core/components/pin/gc.js | 2 +- test/core/gc.spec.js | 32 ++++++++++++++++++------------ 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/package.json b/package.json index 850b95ff2e..152807d1d7 100644 --- a/package.json +++ b/package.json @@ -147,6 +147,7 @@ "multicodec": "~0.5.5", "multihashes": "~0.4.14", "multihashing-async": "~0.6.0", + "nanoid": "^2.0.3", "node-fetch": "^2.3.0", "p-event": "^4.1.0", "peer-book": "~0.9.0", diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 30e3a9deb1..4e724ade89 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -2,6 +2,7 @@ const assert = require('assert') const mortice = require('mortice') +const nanoid = require('nanoid') const pull = require('pull-stream/pull') const pullThrough = require('pull-stream/throughs/through') const pullAsyncMap = require('pull-stream/throughs/async-map') @@ -15,7 +16,7 @@ class GCLock extends EventEmitter { // Ensure that we get a different mutex for each instance of GCLock // (There should only be one GCLock instance per IPFS instance, but // there may be multiple IPFS instances, eg in unit tests) - const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() + const randId = nanoid() this.mutex = mortice(randId, { singleProcess: repoOwner }) diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 57a1dcbc00..f8507722e7 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -22,7 +22,7 @@ module.exports = function gc (self) { self._gcLock.writeLock((lockCb) => { parallel([ - // Get all blocks from the blockstore + // Get all blocks keys from the blockstore (cb) => self._repo.blocks.query({ keysOnly: true }, cb), // Mark all blocks that are being used (cb) => createMarkedSet(self, cb) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 92eae9ae28..4aaece6e49 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -7,10 +7,10 @@ const dirtyChai = require('dirty-chai') const expect = chai.expect chai.use(dirtyChai) -const isNode = require('detect-node') +const IPFSFactory = require('ipfsd-ctl') const pEvent = require('p-event') +const env = require('ipfs-utils/src/env') const IPFS = require('../../src/core') -const createTempRepo = require('../utils/create-repo-nodejs') describe('gc', function () { const fixtures = [{ @@ -27,28 +27,34 @@ describe('gc', function () { content: Buffer.from('path4') }] + let ipfsd let ipfs - let repo before(function (done) { - this.timeout(20 * 1000) - repo = createTempRepo() + this.timeout(40 * 1000) + + const factory = IPFSFactory.create({ type: 'proc', exec: IPFS }) + let config = { Bootstrap: [] } - if (isNode) { + if (env.isNode) { config.Addresses = { Swarm: ['/ip4/127.0.0.1/tcp/0'] } } - ipfs = new IPFS({ repo, config }) - ipfs.on('ready', done) - }) - after(function (done) { - this.timeout(60 * 1000) - ipfs.stop(done) + factory.spawn({ config }, (err, node) => { + expect(err).to.not.exist() + + ipfsd = node + ipfs = ipfsd.api + + done() + }) }) - after((done) => repo.teardown(done)) + after((done) => { + ipfsd.stop(done) + }) const blockAddTests = [{ name: 'add', From 43a264418d47c2c7aa623214626b7e269e8db3ed Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 18 Jul 2019 10:01:21 -0400 Subject: [PATCH 47/56] refactor: simplify gc-lock --- src/core/components/pin/gc-lock.js | 89 ++++++------------------------ src/utils/mutex.js | 72 ++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 71 deletions(-) create mode 100644 src/utils/mutex.js diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 4e724ade89..e454f7dc09 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -1,56 +1,27 @@ 'use strict' -const assert = require('assert') -const mortice = require('mortice') -const nanoid = require('nanoid') const pull = require('pull-stream/pull') const pullThrough = require('pull-stream/throughs/through') const pullAsyncMap = require('pull-stream/throughs/async-map') const EventEmitter = require('events') +const Mutex = require('../../../utils/mutex') const log = require('debug')('ipfs:gc:lock') class GCLock extends EventEmitter { constructor (repoOwner) { super() - // Ensure that we get a different mutex for each instance of GCLock - // (There should only be one GCLock instance per IPFS instance, but - // there may be multiple IPFS instances, eg in unit tests) - const randId = nanoid() - this.mutex = mortice(randId, { - singleProcess: repoOwner - }) - - this.lockId = 0 + this.mutex = new Mutex(repoOwner, { log }) } readLock (lockedFn, cb) { - return this.lock('readLock', lockedFn, cb) + this.emit(`readLock request`) + return this.mutex.readLock(lockedFn, cb) } writeLock (lockedFn, cb) { - return this.lock('writeLock', lockedFn, cb) - } - - lock (type, lockedFn, cb) { - assert(typeof lockedFn === 'function', `first argument to GCLock.${type} must be a function`) - assert(typeof cb === 'function', `second argument to GCLock.${type} must be a callback function`) - - const lockId = this.lockId++ - log(`[${lockId}] ${type} requested`) - this.emit(`${type} request`, lockId) - const locked = () => new Promise((resolve, reject) => { - this.emit(`${type} start`, lockId) - log(`[${lockId}] ${type} started`) - lockedFn((err, res) => { - this.emit(`${type} release`, lockId) - log(`[${lockId}] ${type} released`) - err ? reject(err) : resolve(res) - }) - }) - - const lock = this.mutex[type](locked) - return lock.then(res => cb(null, res), cb) + this.emit(`writeLock request`) + return this.mutex.writeLock(lockedFn, cb) } pullReadLock (lockedPullFn) { @@ -73,51 +44,29 @@ class GCLock extends EventEmitter { } class PullLocker { - constructor (emitter, mutex, type, lockId) { + constructor (emitter, mutex, type) { this.emitter = emitter this.mutex = mutex this.type = type - this.lockId = lockId - // This Promise resolves when the mutex gives us permission to start - // running the locked piece of code - this.lockReady = new Promise((resolve) => { - this.lockReadyResolver = resolve - }) + // The function to call to release the lock. It is set when the lock is taken + this.releaseLock = null } - // Returns a Promise that resolves when the locked piece of code completes - _locked () { - return new Promise((resolve, reject) => { - this.releaseLock = (err) => err ? reject(err) : resolve() - - log(`[${this.lockId}] ${this.type} (pull) started`) - this.emitter.emit(`${this.type} start`, this.lockId) - - // The locked piece of code is ready to start, so resolve the - // this.lockReady Promise (created in the constructor) - this.lockReadyResolver() - }) - } - - // Requests a lock and then waits for the mutex to give us permission to run - // the locked piece of code take () { return pull( pullAsyncMap((i, cb) => { - if (!this.lock) { - log(`[${this.lockId}] ${this.type} (pull) requested`) - this.emitter.emit(`${this.type} request`, this.lockId) - // Request the lock - this.lock = this.mutex[this.type](() => this._locked()) - // If there is an error, it gets passed through to the caller using - // pull streams, so here we just catch the error and ignore it so - // that there isn't an UnhandledPromiseRejectionWarning - this.lock.catch(() => {}) + if (this.lockRequested) { + return cb(null, i) } + this.lockRequested = true + + this.emitter.emit(`${this.type} request`) - // Wait for the mutex to give us permission - this.lockReady.then(() => cb(null, i)) + this.mutex[this.type]((releaseLock) => { + cb(null, i) + this.releaseLock = releaseLock + }) }) ) } @@ -125,8 +74,6 @@ class PullLocker { // Releases the lock release () { return pullThrough(null, (err) => { - log(`[${this.lockId}] ${this.type} (pull) released`) - this.emitter.emit(`${this.type} release`, this.lockId) this.releaseLock(err) }) } diff --git a/src/utils/mutex.js b/src/utils/mutex.js new file mode 100644 index 0000000000..3175caa628 --- /dev/null +++ b/src/utils/mutex.js @@ -0,0 +1,72 @@ +'use strict' + +const assert = require('assert') +const mortice = require('mortice') +const nanoid = require('nanoid') +const setImmediate = require('async/setImmediate') +const noop = () => {} + +// Wrap mortice to present a callback interface +class Mutex { + constructor (repoOwner, options) { + // Ensure that we get a different mutex for each instance of the lock + const randId = nanoid() + this.mutex = mortice(randId, { + singleProcess: repoOwner + }) + + this.log = options.log || noop + this.lockId = 0 + } + + readLock (lockedFn, cb) { + return this._lock('readLock', lockedFn, cb) + } + + writeLock (lockedFn, cb) { + return this._lock('writeLock', lockedFn, cb) + } + + /** + * Request a read or write lock + * + * @param {String} type The type of lock: readLock / writeLock + * @param {function(releaseLock)} lockedFn A function that runs the locked piece of code and calls releaseLock when it completes + * @param {function(err, res)} [cb] A function that is called when the locked function completes + * @returns {void} + */ + _lock (type, lockedFn, cb) { + assert(typeof lockedFn === 'function', `first argument to CBLock.${type}() must be a function`) + + if (typeof cb === 'undefined') { + cb = noop + } + assert(typeof cb === 'function', `second argument to CBLock.${type}() must be a callback function`) + + const lockId = this.lockId++ + this.log(`[${lockId}] ${type} requested`) + + // mortice presents a promise based API, so we need to give it a function + // that returns a Promise. + // The function is invoked when mortice gives permission to run the locked + // piece of code + const locked = () => new Promise((resolve, reject) => { + this.log(`[${lockId}] ${type} started`) + lockedFn((err, res) => { + this.log(`[${lockId}] ${type} released`) + err ? reject(err) : resolve(res) + }) + }) + + // Get a Promise for the lock + const lock = this.mutex[type](locked) + + // When the locked piece of code is complete, the Promise resolves + return lock.then( + (res) => setImmediate(() => cb(null, res)), + (err) => setImmediate(() => cb(err)) + ) + } +} + +module.exports = Mutex From c992c51e4db6d51e2a6d3355cda5a6b689aea2da Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 18 Jul 2019 10:53:31 -0400 Subject: [PATCH 48/56] refactor: move EventEmitter from GCLock to test --- src/core/components/pin/gc-lock.js | 44 ++++++++++++++-------------- src/utils/mutex.js | 2 +- test/core/gc.spec.js | 46 ++++++++++++++++++++++++------ 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index e454f7dc09..c3395c8c2a 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -3,24 +3,19 @@ const pull = require('pull-stream/pull') const pullThrough = require('pull-stream/throughs/through') const pullAsyncMap = require('pull-stream/throughs/async-map') -const EventEmitter = require('events') const Mutex = require('../../../utils/mutex') const log = require('debug')('ipfs:gc:lock') -class GCLock extends EventEmitter { +class GCLock { constructor (repoOwner) { - super() - this.mutex = new Mutex(repoOwner, { log }) } readLock (lockedFn, cb) { - this.emit(`readLock request`) return this.mutex.readLock(lockedFn, cb) } writeLock (lockedFn, cb) { - this.emit(`writeLock request`) return this.mutex.writeLock(lockedFn, cb) } @@ -33,7 +28,7 @@ class GCLock extends EventEmitter { } pullLock (type, lockedPullFn) { - const pullLocker = new PullLocker(this, this.mutex, type, this.lockId++) + const pullLocker = new PullLocker(this.mutex, type) return pull( pullLocker.take(), @@ -44,8 +39,7 @@ class GCLock extends EventEmitter { } class PullLocker { - constructor (emitter, mutex, type) { - this.emitter = emitter + constructor (mutex, type) { this.mutex = mutex this.type = type @@ -54,26 +48,30 @@ class PullLocker { } take () { - return pull( - pullAsyncMap((i, cb) => { - if (this.lockRequested) { - return cb(null, i) - } - this.lockRequested = true - - this.emitter.emit(`${this.type} request`) - - this.mutex[this.type]((releaseLock) => { - cb(null, i) - this.releaseLock = releaseLock - }) + return pullAsyncMap((i, cb) => { + // Check if the lock has already been acquired. + // Note: new items will only come through the pull stream once the first + // item has acquired a lock. + if (this.releaseLock) { + // The lock has been acquired so return immediately + return cb(null, i) + } + + // Request the lock + this.mutex[this.type]((releaseLock) => { + // The lock has been granted, so run the locked piece of code + cb(null, i) + + // Save the release function to be called when the stream completes + this.releaseLock = releaseLock }) - ) + }) } // Releases the lock release () { return pullThrough(null, (err) => { + // When the stream completes, release the lock this.releaseLock(err) }) } diff --git a/src/utils/mutex.js b/src/utils/mutex.js index 3175caa628..f423feecef 100644 --- a/src/utils/mutex.js +++ b/src/utils/mutex.js @@ -8,7 +8,7 @@ const noop = () => {} // Wrap mortice to present a callback interface class Mutex { - constructor (repoOwner, options) { + constructor (repoOwner, options = {}) { // Ensure that we get a different mutex for each instance of the lock const randId = nanoid() this.mutex = mortice(randId, { diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 4aaece6e49..14c34c7141 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -12,6 +12,28 @@ const pEvent = require('p-event') const env = require('ipfs-utils/src/env') const IPFS = require('../../src/core') +// We need to detect when a readLock or writeLock is requested for the tests +// so we override the Mutex class to emit an event +const EventEmitter = require('events') +const Mutex = require('../../src/utils/mutex') + +class MutexEmitter extends Mutex { + constructor (repoOwner) { + super(repoOwner) + this.emitter = new EventEmitter() + } + + readLock (lockedFn, cb) { + this.emitter.emit('readLock request') + return super.readLock(lockedFn, cb) + } + + writeLock (lockedFn, cb) { + this.emitter.emit('writeLock request') + return super.writeLock(lockedFn, cb) + } +} + describe('gc', function () { const fixtures = [{ path: 'test/my/path1', @@ -29,6 +51,7 @@ describe('gc', function () { let ipfsd let ipfs + let lockEmitter before(function (done) { this.timeout(40 * 1000) @@ -48,6 +71,11 @@ describe('gc', function () { ipfsd = node ipfs = ipfsd.api + // Replace the Mutex with one that emits events when a readLock or + // writeLock is requested (needed in the tests below) + ipfs._gcLock.mutex = new MutexEmitter(ipfs._options.repoOwner) + lockEmitter = ipfs._gcLock.mutex.emitter + done() }) }) @@ -79,13 +107,13 @@ describe('gc', function () { it(`garbage collection should wait for pending ${test.name} to finish`, async () => { // Add blocks to IPFS // Note: add operation will take a read lock - const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const addLockRequested = pEvent(lockEmitter, 'readLock request') const add1 = test.add1() // Once add lock has been requested, start GC await addLockRequested // Note: GC will take a write lock - const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gcStarted = pEvent(lockEmitter, 'writeLock request') const gc = ipfs.repo.gc() // Once GC has started, start second add @@ -109,13 +137,13 @@ describe('gc', function () { it('garbage collection should wait for pending add + pin to finish', async () => { // Add blocks to IPFS // Note: add operation will take a read lock - const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const addLockRequested = pEvent(lockEmitter, 'readLock request') const add1 = ipfs.add(fixtures[2], { pin: true }) // Once add lock has been requested, start GC await addLockRequested // Note: GC will take a write lock - const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gcStarted = pEvent(lockEmitter, 'writeLock request') const gc = ipfs.repo.gc() // Once GC has started, start second add @@ -142,13 +170,13 @@ describe('gc', function () { // Remove first block from IPFS // Note: block rm will take a write lock - const rmLockRequested = pEvent(ipfs._gcLock, 'writeLock request') + const rmLockRequested = pEvent(lockEmitter, 'writeLock request') const rm1 = ipfs.block.rm(cid1) // Once rm lock has been requested, start GC await rmLockRequested // Note: GC will take a write lock - const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gcStarted = pEvent(lockEmitter, 'writeLock request') const gc = ipfs.repo.gc() // Once GC has started, start second rm @@ -185,7 +213,7 @@ describe('gc', function () { // Pin first block // Note: pin add will take a read lock - const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pinLockRequested = pEvent(lockEmitter, 'readLock request') const pin1 = ipfs.pin.add(cid1) // Once pin lock has been requested, start GC @@ -222,13 +250,13 @@ describe('gc', function () { // Unpin first block // Note: pin rm will take a read lock - const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pinLockRequested = pEvent(lockEmitter, 'readLock request') const pinRm1 = ipfs.pin.rm(cid1) // Once pin lock has been requested, start GC await pinLockRequested // Note: GC will take a write lock - const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gcStarted = pEvent(lockEmitter, 'writeLock request') const gc = ipfs.repo.gc() // Once GC has started, start second pin rm From c364b19d0dfea9f9ebf100b90c2a99c0fed16109 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 18 Jul 2019 10:55:49 -0400 Subject: [PATCH 49/56] refactor: better default args handing in Mutex --- src/utils/mutex.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/utils/mutex.js b/src/utils/mutex.js index f423feecef..ea0a27fb95 100644 --- a/src/utils/mutex.js +++ b/src/utils/mutex.js @@ -35,12 +35,8 @@ class Mutex { * @param {function(err, res)} [cb] A function that is called when the locked function completes * @returns {void} */ - _lock (type, lockedFn, cb) { + _lock (type, lockedFn, cb = noop) { assert(typeof lockedFn === 'function', `first argument to CBLock.${type}() must be a function`) - - if (typeof cb === 'undefined') { - cb = noop - } assert(typeof cb === 'function', `second argument to CBLock.${type}() must be a callback function`) const lockId = this.lockId++ From 358b957a7621e65a372b0a1019c524aea6db22d0 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 19 Jul 2019 09:33:06 -0400 Subject: [PATCH 50/56] fix: lint fixes --- src/core/components/pin/pin-manager.js | 2 +- test/core/gc.spec.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index f1ae8140e2..848e5c1551 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -195,7 +195,7 @@ class PinManager { cb => this.pinset.loadSet(pinRoot.value, PinTypes.direct, cb) ], (err, keys) => { if (err) { return callback(err) } - const [ rKeys, dKeys ] = keys + const [rKeys, dKeys] = keys this.directPins = new Set(dKeys.map(toB58String)) this.recursivePins = new Set(rKeys.map(toB58String)) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 14c34c7141..9ada465810 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -58,7 +58,7 @@ describe('gc', function () { const factory = IPFSFactory.create({ type: 'proc', exec: IPFS }) - let config = { Bootstrap: [] } + const config = { Bootstrap: [] } if (env.isNode) { config.Addresses = { Swarm: ['/ip4/127.0.0.1/tcp/0'] From bcc8a69f875222312494917f7a7d350810288e7f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 19 Jul 2019 09:53:45 -0400 Subject: [PATCH 51/56] fix: use repo path as mortice id --- package.json | 1 - src/core/components/pin/gc-lock.js | 4 ++-- src/core/index.js | 6 +++++- src/utils/mutex.js | 5 +---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 152807d1d7..850b95ff2e 100644 --- a/package.json +++ b/package.json @@ -147,7 +147,6 @@ "multicodec": "~0.5.5", "multihashes": "~0.4.14", "multihashing-async": "~0.6.0", - "nanoid": "^2.0.3", "node-fetch": "^2.3.0", "p-event": "^4.1.0", "peer-book": "~0.9.0", diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index c3395c8c2a..082b7a687e 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -7,8 +7,8 @@ const Mutex = require('../../../utils/mutex') const log = require('debug')('ipfs:gc:lock') class GCLock { - constructor (repoOwner) { - this.mutex = new Mutex(repoOwner, { log }) + constructor (repoOwner, options = {}) { + this.mutex = new Mutex(repoOwner, { ...options, log }) } readLock (lockedFn, cb) { diff --git a/src/core/index.js b/src/core/index.js index 3f0958be58..8b0b717716 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -91,7 +91,11 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log - this._gcLock = new GCLock(this._options.repoOwner) + this._gcLock = new GCLock(this._options.repoOwner, { + // Make sure GCLock is specific to repo, for tests where there are + // multiple instances of IPFS + morticeId: this._repo.path + }) // IPFS Core exposed components // - for booting up a node diff --git a/src/utils/mutex.js b/src/utils/mutex.js index ea0a27fb95..0b4692cbe1 100644 --- a/src/utils/mutex.js +++ b/src/utils/mutex.js @@ -2,16 +2,13 @@ const assert = require('assert') const mortice = require('mortice') -const nanoid = require('nanoid') const setImmediate = require('async/setImmediate') const noop = () => {} // Wrap mortice to present a callback interface class Mutex { constructor (repoOwner, options = {}) { - // Ensure that we get a different mutex for each instance of the lock - const randId = nanoid() - this.mutex = mortice(randId, { + this.mutex = mortice(options.morticeId, { singleProcess: repoOwner }) From 24c072e3866f404d71a0acbe94fe843e43c23c6c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 22 Jul 2019 12:02:04 -0400 Subject: [PATCH 52/56] test: add repo gc cli tests --- src/cli/commands/repo/gc.js | 6 ++-- test/cli/gc.js | 66 +++++++++++++++++++++++++++++++++++++ test/cli/repo.js | 28 ---------------- test/core/gc.spec.js | 2 +- 4 files changed, 70 insertions(+), 32 deletions(-) create mode 100644 test/cli/gc.js diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index fb022f6bf3..b805ac9934 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -15,7 +15,7 @@ module.exports = { 'stream-errors': { desc: 'Output individual errors thrown when deleting blocks.', type: 'boolean', - default: false + default: true } }, @@ -25,9 +25,9 @@ module.exports = { const res = await ipfs.repo.gc() for (const r of res) { if (r.err) { - streamErrors && print(r.err, true, true) + streamErrors && print(r.err.message, true, true) } else { - print((quiet ? '' : 'Removed ') + r.cid) + print((quiet ? '' : 'removed ') + r.cid) } } })()) diff --git a/test/cli/gc.js b/test/cli/gc.js new file mode 100644 index 0000000000..d66c82f56d --- /dev/null +++ b/test/cli/gc.js @@ -0,0 +1,66 @@ +/* eslint-env mocha */ +'use strict' + +const sinon = require('sinon') +const YargsPromise = require('yargs-promise') +const CID = require('cids') +const cliUtils = require('../../src/cli/utils') +const cli = new YargsPromise(require('../../src/cli/parser')) + +describe('gc', () => { + const setupMocks = (cids, errMsg) => { + let gcRes = cids.map(h => ({ cid: new CID(h) })) + if (errMsg) { + gcRes = gcRes.concat([{ err: new Error(errMsg) }]) + } + + const gcFake = sinon.fake.returns(gcRes) + sinon + .stub(cliUtils, 'getIPFS') + .callsArgWith(1, null, { repo: { gc: gcFake } }) + + return sinon.stub(cliUtils, 'print') + } + + afterEach(() => { + sinon.restore() + }) + + it('gc with no flags prints errors and outputs hashes', async () => { + const cids = [ + 'Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u', + 'QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu' + ] + const errMsg = 'some err' + const printSpy = setupMocks(cids, errMsg) + + await cli.parse(`repo gc`) + + const exp = cids.map(c => 'removed ' + c).concat(errMsg) + for (let i = 0; i < exp.length; i++) { + sinon.assert.calledWith(printSpy.getCall(i), exp[i]) + } + }) + + it('gc with --quiet prints hashes only', async () => { + const cids = [ + 'Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u', + 'QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu' + ] + const printSpy = setupMocks(cids) + + await cli.parse(`repo gc --quiet`) + + const exp = cids.map(c => c.toString()) + for (let i = 0; i < exp.length; i++) { + sinon.assert.calledWith(printSpy.getCall(i), exp[i]) + } + }) + + it('gc with --stream-errors=false does not print errors', async () => { + const printSpy = setupMocks([], 'some err') + + await cli.parse(`repo gc --stream-errors=false`) + sinon.assert.notCalled(printSpy) + }) +}) diff --git a/test/cli/repo.js b/test/cli/repo.js index b49c6ea396..3c83d2d468 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -5,8 +5,6 @@ const expect = require('chai').expect const repoVersion = require('ipfs-repo').repoVersion const runOnAndOff = require('../utils/on-and-off') -const fixturePath = 'test/fixtures/planets/solar-system.md' - describe('repo', () => runOnAndOff((thing) => { let ipfs @@ -19,30 +17,4 @@ describe('repo', () => runOnAndOff((thing) => { expect(out).to.eql(`${repoVersion}\n`) }) }) - - // Note: There are more comprehensive GC tests in interface-js-ipfs-core - it('should run garbage collection', async function () { - this.timeout(60000) - - // Add a file to IPFS - const cid = (await ipfs(`add -Q ${fixturePath}`)).trim() - - // File hash should be in refs local - const localRefs = await ipfs('refs local') - expect(localRefs.split('\n')).includes(cid) - - // Run GC, file should not have been removed because it's pinned - const gcOut = await ipfs('repo gc') - expect(gcOut.split('\n')).not.includes('Removed ' + cid) - - // Unpin file - await ipfs('pin rm ' + cid) - - // Run GC, file should now be removed - const gcOutAfterUnpin = await ipfs('repo gc') - expect(gcOutAfterUnpin.split('\n')).to.includes('Removed ' + cid) - - const localRefsAfterGc = await ipfs('refs local') - expect(localRefsAfterGc.split('\n')).not.includes(cid) - }) })) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 9ada465810..239c8816a7 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -193,7 +193,7 @@ describe('gc', function () { expect(err.code).eql('ERR_DB_DELETE_FAILED') } - // Confirm second second block has been removed + // Confirm second block has been removed const localRefs = (await ipfs.refs.local()).map(r => r.ref) expect(localRefs).not.includes(cid2.toString()) From 97f80549aebec8f8f148dae449472e4b73732133 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 22 Jul 2019 12:32:47 -0400 Subject: [PATCH 53/56] fix: remove hacky cocde --- src/core/components/pin/pin-manager.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index 848e5c1551..be26d7252d 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -158,9 +158,6 @@ class PinManager { }) }, - // hack for CLI tests - cb => this.repo.closed ? this.repo.open(cb) : cb(null, null), - // save root to datastore under a consistent key cb => this.repo.datastore.put(PIN_DS_KEY, root.multihash, cb) ], (err, res) => { @@ -172,9 +169,7 @@ class PinManager { load (callback) { waterfall([ - // hack for CLI tests - (cb) => this.repo.closed ? this.repo.datastore.open(cb) : cb(null, null), - (_, cb) => this.repo.datastore.has(PIN_DS_KEY, cb), + (cb) => this.repo.datastore.has(PIN_DS_KEY, cb), (has, cb) => has ? cb() : cb(new Error('No pins to load')), (cb) => this.repo.datastore.get(PIN_DS_KEY, cb), (mh, cb) => { From 36f45f455f1000e463d2dfc43fbb537edebb149d Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 23 Jul 2019 13:33:46 -0400 Subject: [PATCH 54/56] test: sharness test for GC --- test/sharness/t0030-gc.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 test/sharness/t0030-gc.sh diff --git a/test/sharness/t0030-gc.sh b/test/sharness/t0030-gc.sh new file mode 100755 index 0000000000..599f370581 --- /dev/null +++ b/test/sharness/t0030-gc.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# +# Copyright (c) 2014 Christian Couder +# MIT Licensed; see the LICENSE file in this repository. +# + +test_description="Stress test Garbage Collection" + +. lib/test-lib.sh + +test_expect_success "Can add and then GC filesets" ' + export FILE_COUNT=3 + export FILE_SIZE_MB=1 + for (( i=1; i <= FILE_COUNT; i++ )); do dd if=/dev/urandom bs=$((FILE_SIZE_MB * 1048576)) count=1 of=file$i; done + export IPFS_PATH="$(pwd)/.ipfs" && + echo "IPFS_PATH: \"$IPFS_PATH\"" && + BITS="2048" && + ipfs init --bits="$BITS" >actual_init || + test_fsh cat actual_init + for (( i=1; i <= FILE_COUNT; i++ )); do ipfs add --pin=false --quiet file$i > hash$i; done + time ipfs repo gc + ipfs refs local > local-refs + for (( i=1; i <= FILE_COUNT; i++ )); do test_expect_code 1 grep `cat hash$i` local-refs; done +' + +test_done From d085a30770be8d7212ba3cd33d1b649088a23520 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Wed, 21 Aug 2019 13:30:06 +0100 Subject: [PATCH 55/56] fix: review feedback License: MIT Signed-off-by: Alan Shaw --- package.json | 4 ++-- src/core/components/files-regular/add-pull-stream.js | 2 +- src/core/components/pin/gc.js | 9 ++++++--- src/core/components/pin/pin-manager.js | 11 ++++------- src/utils/mutex.js | 10 +++------- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/package.json b/package.json index 850b95ff2e..16e9307b7d 100644 --- a/package.json +++ b/package.json @@ -96,7 +96,7 @@ "ipfs-bitswap": "~0.25.1", "ipfs-block": "~0.8.1", "ipfs-block-service": "~0.15.2", - "ipfs-http-client": "^33.0.1", + "ipfs-http-client": "^33.1.1", "ipfs-http-response": "~0.3.1", "ipfs-mfs": "~0.12.0", "ipfs-multipart": "~0.1.1", @@ -148,7 +148,6 @@ "multihashes": "~0.4.14", "multihashing-async": "~0.6.0", "node-fetch": "^2.3.0", - "p-event": "^4.1.0", "peer-book": "~0.9.0", "peer-id": "~0.12.3", "peer-info": "~0.15.0", @@ -196,6 +195,7 @@ "ipfsd-ctl": "^0.43.0", "libp2p-websocket-star": "~0.10.2", "ncp": "^2.0.0", + "p-event": "^4.1.0", "qs": "^6.5.2", "rimraf": "^3.0.0", "sinon": "^7.4.0", diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index a81f3a8fe8..ee815f1487 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -116,7 +116,7 @@ function pinFile (file, self, opts, cb) { const isRootDir = !file.path.includes('/') const shouldPin = pin && isRootDir && !opts.onlyHash && !opts.hashAlg if (shouldPin) { - // Note: we take a GC lock wherever we call addPullStream(), so tell + // Note: addPullStream() has already taken a GC lock, so tell // pin.add() not to take a (second) GC lock return self.pin.add(file.hash, { preload: false, lock: false }, err => cb(err, file)) } else { diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index f8507722e7..544b151bf3 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -5,14 +5,15 @@ const CID = require('cids') const base32 = require('base32.js') const parallel = require('async/parallel') const mapLimit = require('async/mapLimit') -const { Key } = require('interface-datastore') const expErr = require('explain-error') const { cidToString } = require('../../../utils/cid') const log = require('debug')('ipfs:gc') +// TODO: Use exported key from root when upgraded to ipfs-mfs@>=13 +// https://github.com/ipfs/js-ipfs-mfs/pull/58 +const { MFS_ROOT_KEY } = require('ipfs-mfs/src/core/utils/constants') // Limit on the number of parallel block remove operations const BLOCK_RM_CONCURRENCY = 256 -const MFS_ROOT_DS_KEY = new Key('/local/filesroot') // Perform mark and sweep garbage collection module.exports = function gc (self) { @@ -72,7 +73,7 @@ function createMarkedSet (ipfs, callback) { }), // The MFS root and all its descendants - (cb) => ipfs._repo.root.get(MFS_ROOT_DS_KEY, (err, mh) => { + (cb) => ipfs._repo.root.get(MFS_ROOT_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { log(`No blocks in MFS`) @@ -147,6 +148,8 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blockKeys, callback) { }) } +// TODO: Use exported utility when upgrade to ipfs-repo@>=0.27.1 +// https://github.com/ipfs/js-ipfs-repo/pull/206 function dsKeyToCid (key) { // Block key is of the form / const decoder = new base32.Decoder() diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index be26d7252d..9bfd702b08 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -13,6 +13,7 @@ const { Key } = require('interface-datastore') const errCode = require('err-code') const multicodec = require('multicodec') const debug = require('debug') +const { cidToString } = require('../../../utils/cid') const createPinSet = require('./pin-set') @@ -20,10 +21,6 @@ const createPinSet = require('./pin-set') const concurrencyLimit = 300 const PIN_DS_KEY = new Key('/local/pins') -function toB58String (hash) { - return new CID(hash).toBaseEncodedString() -} - function invalidPinTypeErr (type) { const errMsg = `Invalid type '${type}', must be one of {direct, indirect, recursive, all}` return errCode(new Error(errMsg), 'ERR_INVALID_PIN_TYPE') @@ -192,8 +189,8 @@ class PinManager { if (err) { return callback(err) } const [rKeys, dKeys] = keys - this.directPins = new Set(dKeys.map(toB58String)) - this.recursivePins = new Set(rKeys.map(toB58String)) + this.directPins = new Set(dKeys.map(k => cidToString(k))) + this.recursivePins = new Set(rKeys.map(k => cidToString(k))) this.log('Loaded pins from the datastore') return callback(null) @@ -202,7 +199,7 @@ class PinManager { } isPinnedWithType (multihash, type, callback) { - const key = toB58String(multihash) + const key = cidToString(multihash) const { recursive, direct, all } = PinTypes // recursive diff --git a/src/utils/mutex.js b/src/utils/mutex.js index 0b4692cbe1..b91f3139a4 100644 --- a/src/utils/mutex.js +++ b/src/utils/mutex.js @@ -2,7 +2,6 @@ const assert = require('assert') const mortice = require('mortice') -const setImmediate = require('async/setImmediate') const noop = () => {} // Wrap mortice to present a callback interface @@ -33,8 +32,8 @@ class Mutex { * @returns {void} */ _lock (type, lockedFn, cb = noop) { - assert(typeof lockedFn === 'function', `first argument to CBLock.${type}() must be a function`) - assert(typeof cb === 'function', `second argument to CBLock.${type}() must be a callback function`) + assert(typeof lockedFn === 'function', `first argument to Mutex.${type}() must be a function`) + assert(typeof cb === 'function', `second argument to Mutex.${type}() must be a callback function`) const lockId = this.lockId++ this.log(`[${lockId}] ${type} requested`) @@ -55,10 +54,7 @@ class Mutex { const lock = this.mutex[type](locked) // When the locked piece of code is complete, the Promise resolves - return lock.then( - (res) => setImmediate(() => cb(null, res)), - (err) => setImmediate(() => cb(err)) - ) + return lock.then(res => cb(null, res), cb) } } From f869455e6d8d1898bf6516fea97f401be7d38215 Mon Sep 17 00:00:00 2001 From: Alex Potsides Date: Fri, 23 Aug 2019 10:24:27 +0100 Subject: [PATCH 56/56] fix: Do not load all of a DAG into memory when pinning (#2387) Port of #2372 into gc branch to ease merging --- src/core/components/pin.js | 2 +- src/core/components/pin/pin-manager.js | 72 ++++++++++++++++++-------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index cfd9035a57..025fdb75ed 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -247,7 +247,7 @@ module.exports = (self) => { } if (type === PinTypes.indirect || type === PinTypes.all) { - pinManager.getIndirectKeys((err, indirects) => { + pinManager.getIndirectKeys(options, (err, indirects) => { if (err) { return callback(err) } pins = pins // if something is pinned both directly and indirectly, diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index 9bfd702b08..096980fcec 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -1,14 +1,14 @@ /* eslint max-nested-callbacks: ["error", 8] */ 'use strict' -const { DAGNode, DAGLink, util } = require('ipld-dag-pb') +const { DAGNode, DAGLink } = require('ipld-dag-pb') const CID = require('cids') -const map = require('async/map') const series = require('async/series') const parallel = require('async/parallel') const eachLimit = require('async/eachLimit') const waterfall = require('async/waterfall') const detectLimit = require('async/detectLimit') +const queue = require('async/queue') const { Key } = require('interface-datastore') const errCode = require('err-code') const multicodec = require('multicodec') @@ -43,6 +43,34 @@ class PinManager { this.recursivePins = new Set() } + _walkDag ({ cid, preload = false, onCid = () => {} }, cb) { + const q = queue(({ cid }, done) => { + this.dag.get(cid, { preload }, (err, result) => { + if (err) { + return done(err) + } + + onCid(cid) + + if (result.value.Links) { + q.push(result.value.Links.map(link => ({ + cid: link.Hash + }))) + } + + done() + }) + }, concurrencyLimit) + q.drain = () => { + cb() + } + q.error = (err) => { + q.kill() + cb(err) + } + q.push({ cid }) + } + directKeys () { return Array.from(this.directPins, key => new CID(key).buffer) } @@ -51,30 +79,21 @@ class PinManager { return Array.from(this.recursivePins, key => new CID(key).buffer) } - getIndirectKeys (callback) { + getIndirectKeys ({ preload }, callback) { const indirectKeys = new Set() eachLimit(this.recursiveKeys(), concurrencyLimit, (multihash, cb) => { - this.dag._getRecursive(multihash, (err, nodes) => { - if (err) { - return cb(err) - } - - map(nodes, (node, cb) => util.cid(util.serialize(node), { - cidVersion: 0 - }).then(cid => cb(null, cid), cb), (err, cids) => { - if (err) { - return cb(err) + this._walkDag({ + cid: new CID(multihash), + preload: preload || false, + onCid: (cid) => { + cid = cid.toString() + + // recursive pins pre-empt indirect pins + if (!this.recursivePins.has(cid)) { + indirectKeys.add(cid) } - - cids - .map(cid => cid.toString()) - // recursive pins pre-empt indirect pins - .filter(key => !this.recursivePins.has(key)) - .forEach(key => indirectKeys.add(key)) - - cb() - }) - }) + } + }, cb) }, (err) => { if (err) { return callback(err) } callback(null, Array.from(indirectKeys)) @@ -283,6 +302,13 @@ class PinManager { }) } + fetchCompleteDag (cid, options, callback) { + this._walkDag({ + cid, + preload: options.preload + }, callback) + } + // Returns an error if the pin type is invalid static checkPinType (type) { if (typeof type !== 'string' || !Object.keys(PinTypes).includes(type)) {