Skip to content

Commit f42a40f

Browse files
achingbrainrvagg
andauthored
feat: add config option to control fanout size (#356)
* feat: add config option to control fanout size Adds a `shardFanoutBytes` option to the importer to allow configuring the number of bytes used for the HAMT prefix, also a test. * fix: use fanout "bits" (#357) --------- Co-authored-by: Rod Vagg <rod@vagg.org>
1 parent d269d97 commit f42a40f

File tree

6 files changed

+62
-10
lines changed

6 files changed

+62
-10
lines changed

packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de
3333
throw errCode(err, 'ERR_NOT_UNIXFS')
3434
}
3535

36-
if (!dir.fanout) {
36+
if (dir.fanout == null) {
3737
throw errCode(new Error('missing fanout'), 'ERR_NOT_UNIXFS')
3838
}
3939

packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts

+37-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import * as dagPb from '@ipld/dag-pb'
44
import { expect } from 'aegir/chai'
55
import { MemoryBlockstore } from 'blockstore-core'
66
import { UnixFS } from 'ipfs-unixfs'
7-
import { importer } from 'ipfs-unixfs-importer'
7+
import { importer, type ImportCandidate } from 'ipfs-unixfs-importer'
88
import all from 'it-all'
99
import randomBytes from 'it-buffer-stream'
1010
import last from 'it-last'
@@ -255,4 +255,40 @@ describe('exporter sharded', function () {
255255

256256
expect(exported.name).to.deep.equal('file-1')
257257
})
258+
259+
it('exports a shard with a different fanout size', async () => {
260+
const files: ImportCandidate[] = [{
261+
path: '/baz.txt',
262+
content: Uint8Array.from([0, 1, 2, 3, 4])
263+
}, {
264+
path: '/foo.txt',
265+
content: Uint8Array.from([0, 1, 2, 3, 4])
266+
}, {
267+
path: '/bar.txt',
268+
content: Uint8Array.from([0, 1, 2, 3, 4])
269+
}]
270+
271+
const result = await last(importer(files, block, {
272+
shardSplitThresholdBytes: 0,
273+
shardFanoutBits: 4, // 2**4 = 16 children max
274+
wrapWithDirectory: true
275+
}))
276+
277+
if (result == null) {
278+
throw new Error('Import failed')
279+
}
280+
281+
const { cid } = result
282+
const dir = await exporter(cid, block)
283+
284+
expect(dir).to.have.nested.property('unixfs.fanout', 16n)
285+
286+
const contents = await all(dir.content())
287+
288+
expect(contents.map(entry => ({
289+
path: `/${entry.name}`,
290+
content: entry.node
291+
})))
292+
.to.deep.equal(files)
293+
})
258294
})

packages/ipfs-unixfs-importer/src/dir-sharded.ts

+11-4
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,21 @@ async function hamtHashFn (buf: Uint8Array): Promise<Uint8Array> {
1818
}
1919

2020
const HAMT_HASH_CODE = BigInt(0x22)
21+
const DEFAULT_FANOUT_BITS = 8
22+
23+
export interface DirShardedOptions extends PersistOptions {
24+
shardFanoutBits: number
25+
}
2126

2227
class DirSharded extends Dir {
2328
private readonly _bucket: Bucket<InProgressImportResult | Dir>
2429

25-
constructor (props: DirProps, options: PersistOptions) {
30+
constructor (props: DirProps, options: DirShardedOptions) {
2631
super(props, options)
2732

2833
this._bucket = createHAMT({
2934
hashFn: hamtHashFn,
30-
bits: 8
35+
bits: options.shardFanoutBits ?? DEFAULT_FANOUT_BITS
3136
})
3237
}
3338

@@ -88,6 +93,7 @@ export default DirSharded
8893

8994
async function * flush (bucket: Bucket<Dir | InProgressImportResult>, blockstore: Blockstore, shardRoot: DirSharded | null, options: PersistOptions): AsyncIterable<ImportResult> {
9095
const children = bucket._children
96+
const padLength = (bucket.tableSize() - 1).toString(16).length
9197
const links: PBLink[] = []
9298
let childrenSize = 0n
9399

@@ -98,7 +104,7 @@ async function * flush (bucket: Bucket<Dir | InProgressImportResult>, blockstore
98104
continue
99105
}
100106

101-
const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0')
107+
const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0')
102108

103109
if (child instanceof Bucket) {
104110
let shard
@@ -191,6 +197,7 @@ function isDir (obj: any): obj is Dir {
191197

192198
function calculateSize (bucket: Bucket<any>, shardRoot: DirSharded | null, options: PersistOptions): number {
193199
const children = bucket._children
200+
const padLength = (bucket.tableSize() - 1).toString(16).length
194201
const links: PBLink[] = []
195202

196203
for (let i = 0; i < children.length; i++) {
@@ -200,7 +207,7 @@ function calculateSize (bucket: Bucket<any>, shardRoot: DirSharded | null, optio
200207
continue
201208
}
202209

203-
const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0')
210+
const labelPrefix = i.toString(16).toUpperCase().padStart(padLength, '0')
204211

205212
if (child instanceof Bucket) {
206213
const size = calculateSize(child, null, options)

packages/ipfs-unixfs-importer/src/flat-to-shard.ts

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import { DirFlat } from './dir-flat.js'
2-
import DirSharded from './dir-sharded.js'
2+
import DirSharded, { type DirShardedOptions } from './dir-sharded.js'
33
import type { Dir } from './dir.js'
4-
import type { PersistOptions } from './utils/persist.js'
54

6-
export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: PersistOptions): Promise<DirSharded> {
5+
export async function flatToShard (child: Dir | null, dir: Dir, threshold: number, options: DirShardedOptions): Promise<DirSharded> {
76
let newDir = dir as DirSharded
87

98
if (dir instanceof DirFlat && dir.estimateNodeSize() > threshold) {
@@ -31,7 +30,7 @@ export async function flatToShard (child: Dir | null, dir: Dir, threshold: numbe
3130
return newDir
3231
}
3332

34-
async function convertToShard (oldDir: DirFlat, options: PersistOptions): Promise<DirSharded> {
33+
async function convertToShard (oldDir: DirFlat, options: DirShardedOptions): Promise<DirSharded> {
3534
const newDir = new DirSharded({
3635
root: oldDir.root,
3736
dir: true,

packages/ipfs-unixfs-importer/src/index.ts

+9
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ export interface ImporterOptions extends ProgressOptions<ImporterProgressEvents>
123123
*/
124124
shardSplitThresholdBytes?: number
125125

126+
/**
127+
* The number of bits of a hash digest used at each level of sharding to
128+
* the child index. 2**shardFanoutBits will dictate the maximum number of
129+
* children for any shard in the HAMT. Default: 8
130+
*/
131+
shardFanoutBits?: number
132+
126133
/**
127134
* How many files to import concurrently. For large numbers of small files this
128135
* should be high (e.g. 50). Default: 10
@@ -241,6 +248,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri
241248

242249
const wrapWithDirectory = options.wrapWithDirectory ?? false
243250
const shardSplitThresholdBytes = options.shardSplitThresholdBytes ?? 262144
251+
const shardFanoutBits = options.shardFanoutBits ?? 8
244252
const cidVersion = options.cidVersion ?? 1
245253
const rawLeaves = options.rawLeaves ?? true
246254
const leafType = options.leafType ?? 'file'
@@ -269,6 +277,7 @@ export async function * importer (source: ImportCandidateStream, blockstore: Wri
269277
const buildTree: TreeBuilder = options.treeBuilder ?? defaultTreeBuilder({
270278
wrapWithDirectory,
271279
shardSplitThresholdBytes,
280+
shardFanoutBits,
272281
cidVersion,
273282
onProgress: options.onProgress
274283
})

packages/ipfs-unixfs-importer/src/tree-builder.ts

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { PersistOptions } from './utils/persist.js'
77

88
export interface AddToTreeOptions extends PersistOptions {
99
shardSplitThresholdBytes: number
10+
shardFanoutBits: number
1011
}
1112

1213
async function addToTree (elem: InProgressImportResult, tree: Dir, options: AddToTreeOptions): Promise<Dir> {

0 commit comments

Comments
 (0)