Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit 438f3b4

Browse files
committed
feat: support --raw-leaves
Goes some way towards fixing ipfs/js-ipfs#1432 - will need follow up PRs for js-ipfs-mfs and js-ipfs itself (🔜). There are three ways of importing a file we need to support and each will end up with slightly different DAG structure. ipfs add will result in a balanced DAG with leaf nodes that are unixfs nodes of type file ipfs files write results in a trickle DAG with leaf nodes that are unixfs nodes of type raw ipfs add --raw-leaves and ipfs files write --raw-leaves have the balanced/trickle DAG of above, but the leaf nodes are chunks of file data not wrapped in protobufs. In all cases above the root node is a unixfs file node with a v0 CID, unless you specify --cid-version=1. This PR: Changes meaning of existing rawLeaves argument. Now means the leaf node is just data - a chunk of the file, previously it was meant a unixfs node with type raw. So far the only code using this is js-ipfs-mfs so changing it shouldn't be too disruptive. Adds a leafType option which can be file or raw - when --raw-leaves is false, this is what the unixfs leaf type will be. Uses CIDv1 for raw leaves with the codec raw
1 parent 41b8ce5 commit 438f3b4

File tree

8 files changed

+214
-33
lines changed

8 files changed

+214
-33
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb
149149
- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
150150
- `hashAlg` (string): multihash hashing algorithm to use
151151
- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
152-
- `rawLeafNodes` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will be marked as `raw` `unixfs` nodes
152+
- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes
153153

154154
### Exporter
155155

src/builder/builder.js

+47-17
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ const parallel = require('async/parallel')
88
const waterfall = require('async/waterfall')
99
const dagPB = require('ipld-dag-pb')
1010
const CID = require('cids')
11+
const multihash = require('multihashing-async')
1112

1213
const reduce = require('./reduce')
1314

@@ -17,7 +18,9 @@ const defaultOptions = {
1718
chunkerOptions: {
1819
maxChunkSize: 262144
1920
},
20-
rawLeafNodes: false
21+
rawLeaves: false,
22+
hashAlg: 'sha2-256',
23+
leafType: 'file'
2124
}
2225

2326
module.exports = function (createChunker, ipld, createReducer, _options) {
@@ -97,7 +100,6 @@ module.exports = function (createChunker, ipld, createReducer, _options) {
97100

98101
let previous
99102
let count = 0
100-
const leafType = options.rawLeafNodes ? 'raw' : 'file'
101103

102104
pull(
103105
file.content,
@@ -108,30 +110,58 @@ module.exports = function (createChunker, ipld, createReducer, _options) {
108110
}
109111
return Buffer.from(chunk)
110112
}),
111-
pull.map(buffer => new UnixFS(leafType, buffer)),
112-
pull.asyncMap((fileNode, callback) => {
113-
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
114-
callback(err, { DAGNode: node, fileNode: fileNode })
113+
pull.asyncMap((buffer, callback) => {
114+
if (options.rawLeaves) {
115+
return multihash(buffer, options.hashAlg, (error, hash) => {
116+
if (error) {
117+
return callback(error)
118+
}
119+
120+
return callback(null, {
121+
multihash: hash,
122+
size: buffer.length,
123+
leafSize: buffer.length,
124+
cid: new CID(1, 'raw', hash),
125+
data: buffer
126+
})
127+
})
128+
}
129+
130+
const file = new UnixFS(options.leafType, buffer)
131+
132+
DAGNode.create(file.marshal(), [], options.hashAlg, (err, node) => {
133+
let cid = new CID(0, 'dag-pb', node.multihash)
134+
135+
if (options.cidVersion === 1) {
136+
cid = cid.toV1()
137+
}
138+
139+
callback(err, {
140+
multihash: node.multihash,
141+
size: node.size,
142+
leafSize: file.fileSize(),
143+
cid,
144+
data: node
145+
})
115146
})
116147
}),
117148
pull.asyncMap((leaf, callback) => {
118-
if (options.onlyHash) return callback(null, leaf)
119-
120-
let cid = new CID(leaf.DAGNode.multihash)
121-
122-
if (options.cidVersion === 1) {
123-
cid = cid.toV1()
149+
if (options.onlyHash) {
150+
return callback(null, leaf)
124151
}
125152

126-
ipld.put(leaf.DAGNode, { cid }, (err) => callback(err, leaf))
153+
ipld.put(leaf.data, {
154+
cid: leaf.cid
155+
}, (error) => callback(error, leaf))
127156
}),
128157
pull.map((leaf) => {
129158
return {
130159
path: file.path,
131-
multihash: leaf.DAGNode.multihash,
132-
size: leaf.DAGNode.size,
133-
leafSize: leaf.fileNode.fileSize(),
134-
name: ''
160+
multihash: leaf.multihash,
161+
size: leaf.size,
162+
leafSize: leaf.leafSize,
163+
name: '',
164+
cid: leaf.cid
135165
}
136166
}),
137167
through( // mark as single node if only one single node

src/builder/reduce.js

+33-10
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ module.exports = function (file, ipld, options) {
1313
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
1414
const leaf = leaves[0]
1515

16-
if (!options.rawLeafNodes) {
16+
if (options.leafType === 'file' && !options.rawLeaves) {
1717
return callback(null, {
1818
path: file.path,
1919
multihash: leaf.multihash,
@@ -23,19 +23,22 @@ module.exports = function (file, ipld, options) {
2323
})
2424
}
2525

26-
// we are using raw leaf nodes, this file only has one node but it'll be marked raw
27-
// so convert it back to a file node
26+
// we're using raw leaf nodes so we convert the node into a UnixFS `file` node.
2827
return waterfall([
29-
(cb) => ipld.get(new CID(leaf.multihash), cb),
28+
(cb) => ipld.get(leaf.cid, cb),
3029
(result, cb) => {
31-
const meta = UnixFS.unmarshal(result.value.data)
32-
const fileNode = new UnixFS('file', meta.data)
30+
const data = result.value.data
31+
const fileNode = new UnixFS('file', data)
3332

3433
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
3534
cb(err, { DAGNode: node, fileNode: fileNode })
3635
})
3736
},
3837
(result, cb) => {
38+
if (options.onlyHash) {
39+
return cb(null, result)
40+
}
41+
3942
let cid = new CID(result.DAGNode.multihash)
4043

4144
if (options.cidVersion === 1) {
@@ -46,10 +49,11 @@ module.exports = function (file, ipld, options) {
4649
},
4750
(result, cb) => {
4851
cb(null, {
52+
path: file.path,
4953
multihash: result.DAGNode.multihash,
5054
size: result.DAGNode.size,
5155
leafSize: result.fileNode.fileSize(),
52-
name: ''
56+
name: leaf.name
5357
})
5458
}
5559
], callback)
@@ -61,13 +65,26 @@ module.exports = function (file, ipld, options) {
6165
const links = leaves.map((leaf) => {
6266
f.addBlockSize(leaf.leafSize)
6367

64-
return new DAGLink(leaf.name, leaf.size, leaf.multihash)
68+
let cid = leaf.cid
69+
70+
if (!cid) {
71+
// we are an intermediate node
72+
cid = new CID(0, 'dag-pb', leaf.multihash)
73+
74+
if (options.cidVersion === 1) {
75+
cid = cid.toV1()
76+
}
77+
}
78+
79+
return new DAGLink(leaf.name, leaf.size, cid.buffer)
6580
})
6681

6782
waterfall([
6883
(cb) => DAGNode.create(f.marshal(), links, options.hashAlg, cb),
6984
(node, cb) => {
70-
if (options.onlyHash) return cb(null, node)
85+
if (options.onlyHash) {
86+
return cb(null, node)
87+
}
7188

7289
let cid = new CID(node.multihash)
7390

@@ -83,10 +100,16 @@ module.exports = function (file, ipld, options) {
83100
return // early
84101
}
85102

103+
let cid = new CID(0, 'dag-pb', node.multihash)
104+
105+
if (options.cidVersion === 1) {
106+
cid = cid.toV1()
107+
}
108+
86109
const root = {
87110
name: '',
88111
path: file.path,
89-
multihash: node.multihash,
112+
multihash: cid.buffer,
90113
size: node.size,
91114
leafSize: f.fileSize()
92115
}

src/exporter/file.js

+10
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ function streamBytes (dag, node, fileSize, offset, length) {
6363

6464
function getData ({ node, start }) {
6565
try {
66+
if (Buffer.isBuffer(node)) {
67+
// this is a raw node
68+
return extractDataFromBlock(node, start, offset, end)
69+
}
70+
6671
const file = UnixFS.unmarshal(node.data)
6772

6873
if (!file.data) {
@@ -80,6 +85,11 @@ function streamBytes (dag, node, fileSize, offset, length) {
8085
let streamPosition = 0
8186

8287
function visitor ({ node }) {
88+
if (Buffer.isBuffer(node)) {
89+
// this is a raw node
90+
return pull.empty()
91+
}
92+
8393
const file = UnixFS.unmarshal(node.data)
8494
const nodeHasData = Boolean(file.data && file.data.length)
8595

src/importer/index.js

+19-1
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,29 @@ const chunkers = {
1515

1616
const defaultOptions = {
1717
chunker: 'fixed',
18-
rawLeafNodes: false
18+
rawLeaves: false,
19+
hashOnly: false,
20+
cidVersion: 0,
21+
hash: null,
22+
leafType: 'file',
23+
hashAlg: 'sha2-256'
1924
}
2025

2126
module.exports = function (ipld, _options) {
2227
const options = Object.assign({}, defaultOptions, _options)
28+
29+
if (options.cidVersion > 0 && _options.rawLeaves === undefined) {
30+
// if the cid version is 1 or above, use raw leaves as this is
31+
// what go does.
32+
options.rawLeaves = true
33+
}
34+
35+
if (_options && _options.hash !== undefined && _options.rawLeaves === undefined) {
36+
// if a non-default hash alg has been specified, use raw leaves as this is
37+
// what go does.
38+
options.rawLeaves = true
39+
}
40+
2341
const Chunker = chunkers[options.chunker]
2442
assert(Chunker, 'Unknkown chunker named ' + options.chunker)
2543

test/exporter.js

+31
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,37 @@ module.exports = (repo) => {
422422
)
423423
})
424424

425+
it('exports a large file > 5mb imported with raw leaves', function (done) {
426+
this.timeout(30 * 1000)
427+
428+
pull(
429+
pull.values([{
430+
path: '200Bytes.txt',
431+
content: pull.values([bigFile])
432+
}]),
433+
importer(ipld, {
434+
rawLeaves: true
435+
}),
436+
pull.collect(collected)
437+
)
438+
439+
function collected (err, files) {
440+
expect(err).to.not.exist()
441+
expect(files.length).to.equal(1)
442+
443+
pull(
444+
exporter(files[0].multihash, ipld),
445+
pull.collect((err, files) => {
446+
expect(err).to.not.exist()
447+
448+
expect(bs58.encode(files[0].hash)).to.equal('QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY')
449+
450+
fileEql(files[0], bigFile, done)
451+
})
452+
)
453+
}
454+
})
455+
425456
it('returns an empty stream for dir', (done) => {
426457
const hash = 'QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn'
427458

test/helpers/collect-leaf-cids.js

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
'use strict'
2+
3+
const pull = require('pull-stream')
4+
const traverse = require('pull-traverse')
5+
const CID = require('cids')
6+
7+
module.exports = (ipld, multihash, callback) => {
8+
pull(
9+
traverse.depthFirst(new CID(multihash), (cid) => {
10+
return pull(
11+
pull.values([cid]),
12+
pull.asyncMap((cid, callback) => {
13+
ipld.get(cid, (error, result) => {
14+
callback(error, !error && result.value)
15+
})
16+
}),
17+
pull.asyncMap((node, callback) => {
18+
if (!node.links) {
19+
return callback()
20+
}
21+
22+
return callback(
23+
null, node.links.map(link => new CID(link.multihash))
24+
)
25+
}),
26+
pull.filter(Boolean),
27+
pull.flatten()
28+
)
29+
}),
30+
pull.collect(callback)
31+
)
32+
}

0 commit comments

Comments
 (0)