Skip to content

Commit b306853

Browse files
author
Alan Shaw
committed
fix: walkPath HAMT with non-default fanout
1 parent f42a40f commit b306853

File tree

2 files changed

+59
-11
lines changed

2 files changed

+59
-11
lines changed

packages/ipfs-unixfs-exporter/src/utils/find-cid-in-shard.ts

+34-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { decode, type PBLink, type PBNode } from '@ipld/dag-pb'
22
import { murmur3128 } from '@multiformats/murmur3'
33
import { Bucket, type BucketPosition, createHAMT } from 'hamt-sharding'
4+
import errCode from 'err-code'
5+
import { UnixFS } from 'ipfs-unixfs'
46
import type { ExporterOptions, ShardTraversalContext, ReadableStorage } from '../index.js'
57
import type { CID } from 'multiformats/cid'
68

@@ -16,13 +18,14 @@ const hashFn = async function (buf: Uint8Array): Promise<Uint8Array> {
1618
}
1719

1820
const addLinksToHamtBucket = async (links: PBLink[], bucket: Bucket<boolean>, rootBucket: Bucket<boolean>): Promise<void> => {
21+
const padLength = (bucket.tableSize() - 1).toString(16).length
1922
await Promise.all(
2023
links.map(async link => {
2124
if (link.Name == null) {
2225
// TODO(@rvagg): what do? this is technically possible
2326
throw new Error('Unexpected Link without a Name')
2427
}
25-
if (link.Name.length === 2) {
28+
if (link.Name.length === padLength) {
2629
const pos = parseInt(link.Name, 16)
2730

2831
bucket._putObjectAt(pos, new Bucket({
@@ -37,12 +40,12 @@ const addLinksToHamtBucket = async (links: PBLink[], bucket: Bucket<boolean>, ro
3740
)
3841
}
3942

40-
const toPrefix = (position: number): string => {
43+
const toPrefix = (position: number, padLength: number): string => {
4144
return position
4245
.toString(16)
4346
.toUpperCase()
44-
.padStart(2, '0')
45-
.substring(0, 2)
47+
.padStart(padLength, '0')
48+
.substring(0, padLength)
4649
}
4750

4851
const toBucketPath = (position: BucketPosition<boolean>): Array<Bucket<boolean>> => {
@@ -62,8 +65,27 @@ const toBucketPath = (position: BucketPosition<boolean>): Array<Bucket<boolean>>
6265

6366
const findShardCid = async (node: PBNode, name: string, blockstore: ReadableStorage, context?: ShardTraversalContext, options?: ExporterOptions): Promise<CID | undefined> => {
6467
if (context == null) {
68+
if (node.Data == null) {
69+
throw errCode(new Error('no data in PBNode'), 'ERR_NOT_UNIXFS')
70+
}
71+
72+
let dir: UnixFS
73+
try {
74+
dir = UnixFS.unmarshal(node.Data)
75+
} catch (err: any) {
76+
throw errCode(err, 'ERR_NOT_UNIXFS')
77+
}
78+
79+
if (dir.type !== 'hamt-sharded-directory') {
80+
throw errCode(new Error('not a HAMT'), 'ERR_NOT_UNIXFS')
81+
}
82+
if (dir.fanout == null) {
83+
throw errCode(new Error('missing fanout'), 'ERR_NOT_UNIXFS')
84+
}
85+
6586
const rootBucket = createHAMT<boolean>({
66-
hashFn
87+
hashFn,
88+
bits: Math.log2(Number(dir.fanout))
6789
})
6890

6991
context = {
@@ -73,25 +95,27 @@ const findShardCid = async (node: PBNode, name: string, blockstore: ReadableStor
7395
}
7496
}
7597

98+
const padLength = (context.lastBucket.tableSize() - 1).toString(16).length
99+
76100
await addLinksToHamtBucket(node.Links, context.lastBucket, context.rootBucket)
77101

78102
const position = await context.rootBucket._findNewBucketAndPos(name)
79-
let prefix = toPrefix(position.pos)
103+
let prefix = toPrefix(position.pos, padLength)
80104
const bucketPath = toBucketPath(position)
81105

82106
if (bucketPath.length > context.hamtDepth) {
83107
context.lastBucket = bucketPath[context.hamtDepth]
84108

85-
prefix = toPrefix(context.lastBucket._posAtParent)
109+
prefix = toPrefix(context.lastBucket._posAtParent, padLength)
86110
}
87111

88112
const link = node.Links.find(link => {
89113
if (link.Name == null) {
90114
return false
91115
}
92116

93-
const entryPrefix = link.Name.substring(0, 2)
94-
const entryName = link.Name.substring(2)
117+
const entryPrefix = link.Name.substring(0, padLength)
118+
const entryName = link.Name.substring(padLength)
95119

96120
if (entryPrefix !== prefix) {
97121
// not the entry or subshard we're looking for
@@ -110,7 +134,7 @@ const findShardCid = async (node: PBNode, name: string, blockstore: ReadableStor
110134
return
111135
}
112136

113-
if (link.Name != null && link.Name.substring(2) === name) {
137+
if (link.Name != null && link.Name.substring(padLength) === name) {
114138
return link.Hash
115139
}
116140

packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts

+25-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ describe('exporter sharded', function () {
241241
await block.put(nodeBlockCid, nodeBlockBuf)
242242

243243
const shardNodeBuf = dagPb.encode({
244-
Data: new UnixFS({ type: 'hamt-sharded-directory' }).marshal(),
244+
Data: new UnixFS({ type: 'hamt-sharded-directory', fanout: 2n**8n }).marshal(),
245245
Links: [{
246246
Name: '75normal-dir',
247247
Tsize: nodeBlockBuf.length,
@@ -291,4 +291,28 @@ describe('exporter sharded', function () {
291291
})))
292292
.to.deep.equal(files)
293293
})
294+
295+
it('walks path of a HAMT with a different fanout size', async () => {
296+
const files: ImportCandidate[] = [{
297+
path: '/foo/bar/baz.txt',
298+
content: Uint8Array.from([0, 1, 2, 3, 4])
299+
}]
300+
301+
const result = await last(importer(files, block, {
302+
shardSplitThresholdBytes: 0,
303+
shardFanoutBits: 4, // 2**4 = 16 children max
304+
wrapWithDirectory: true
305+
}))
306+
307+
if (result == null) {
308+
throw new Error('Import failed')
309+
}
310+
311+
const { cid } = result
312+
const file = await last(walkPath(`${cid}/foo/bar/baz.txt`, block))
313+
expect([{
314+
path: file?.path.replace(`${cid}`, ''),
315+
content: file?.node
316+
}]).to.deep.equal(files)
317+
})
294318
})

0 commit comments

Comments
 (0)