7
7
8
8
import _ from 'lodash' ;
9
9
import logger from '@docusaurus/logger' ;
10
- import { matchRoutes } from 'react-router-config' ;
10
+ import { matchRoutes as reactRouterMatchRoutes } from 'react-router-config' ;
11
11
import { parseURLPath , serializeURLPath , type URLPath } from '@docusaurus/utils' ;
12
12
import { getAllFinalRoutes } from './utils' ;
13
13
import type { RouteConfig , ReportingSeverity } from '@docusaurus/types' ;
14
14
15
+ function matchRoutes ( routeConfig : RouteConfig [ ] , pathname : string ) {
16
+ // @ts -expect-error: React router types RouteConfig with an actual React
17
+ // component, but we load route components with string paths.
18
+ // We don't actually access component here, so it's fine.
19
+ return reactRouterMatchRoutes ( routeConfig , pathname ) ;
20
+ }
21
+
15
22
type BrokenLink = {
16
23
link : string ;
17
24
resolvedLink : string ;
@@ -26,88 +33,121 @@ type CollectedLinks = {
26
33
[ pathname : string ] : { links : string [ ] ; anchors : string [ ] } ;
27
34
} ;
28
35
29
- function getBrokenLinksForPage ( {
36
+ // We use efficient data structures for performance reasons
37
+ // See https://github.com/facebook/docusaurus/issues/9754
38
+ type CollectedLinksNormalized = Map <
39
+ string ,
40
+ { links : Set < string > ; anchors : Set < string > }
41
+ > ;
42
+
43
+ type BrokenLinksHelper = {
44
+ collectedLinks : CollectedLinksNormalized ;
45
+ isPathBrokenLink : ( linkPath : URLPath ) => boolean ;
46
+ isAnchorBrokenLink : ( linkPath : URLPath ) => boolean ;
47
+ } ;
48
+
49
+ function createBrokenLinksHelper ( {
30
50
collectedLinks,
31
- pagePath,
32
- pageLinks,
33
51
routes,
34
52
} : {
35
- collectedLinks : CollectedLinks ;
36
- pagePath : string ;
37
- pageLinks : string [ ] ;
38
- pageAnchors : string [ ] ;
53
+ collectedLinks : CollectedLinksNormalized ;
39
54
routes : RouteConfig [ ] ;
40
- } ) : BrokenLink [ ] {
41
- const allCollectedPaths = new Set ( Object . keys ( collectedLinks ) ) ;
55
+ } ) : BrokenLinksHelper {
56
+ const validPathnames = new Set ( collectedLinks . keys ( ) ) ;
57
+
58
+ // Matching against the route array can be expensive
59
+ // If the route is already in the valid pathnames,
60
+ // we can avoid matching against it as an optimization
61
+ const remainingRoutes = routes . filter (
62
+ ( route ) => ! validPathnames . has ( route . path ) ,
63
+ ) ;
64
+
65
+ function isPathnameMatchingAnyRoute ( pathname : string ) : boolean {
66
+ if ( matchRoutes ( remainingRoutes , pathname ) . length > 0 ) {
67
+ // IMPORTANT: this is an optimization here
68
+ // See https://github.com/facebook/docusaurus/issues/9754
69
+ // Large Docusaurus sites have many routes!
70
+ // We try to minimize calls to a possibly expensive matchRoutes function
71
+ validPathnames . add ( pathname ) ;
72
+ return true ;
73
+ }
74
+
75
+ return false ;
76
+ }
42
77
43
78
function isPathBrokenLink ( linkPath : URLPath ) {
44
79
const pathnames = [ linkPath . pathname , decodeURI ( linkPath . pathname ) ] ;
45
- const matchedRoutes = pathnames
46
- // @ts -expect-error: React router types RouteConfig with an actual React
47
- // component, but we load route components with string paths.
48
- // We don't actually access component here, so it's fine.
49
- . map ( ( l ) => matchRoutes ( routes , l ) )
50
- . flat ( ) ;
51
- // The link path is broken if:
52
- // - it doesn't match any route
53
- // - it doesn't match any collected path
54
- return (
55
- matchedRoutes . length === 0 &&
56
- ! pathnames . some ( ( p ) => allCollectedPaths . has ( p ) )
57
- ) ;
80
+ if ( pathnames . some ( ( p ) => validPathnames . has ( p ) ) ) {
81
+ return false ;
82
+ }
83
+ if ( pathnames . some ( isPathnameMatchingAnyRoute ) ) {
84
+ return false ;
85
+ }
86
+ return true ;
58
87
}
59
88
60
89
function isAnchorBrokenLink ( linkPath : URLPath ) {
61
90
const { pathname, hash} = linkPath ;
62
-
63
91
// Link has no hash: it can't be a broken anchor link
64
92
if ( hash === undefined ) {
65
93
return false ;
66
94
}
67
-
68
95
// Link has empty hash ("#", "/page#"...): we do not report it as broken
69
96
// Empty hashes are used for various weird reasons, by us and other users...
70
97
// See for example: https://github.com/facebook/docusaurus/pull/6003
71
98
if ( hash === '' ) {
72
99
return false ;
73
100
}
74
-
75
101
const targetPage =
76
- collectedLinks [ pathname ] || collectedLinks [ decodeURI ( pathname ) ] ;
77
-
102
+ collectedLinks . get ( pathname ) || collectedLinks . get ( decodeURI ( pathname ) ) ;
78
103
// link with anchor to a page that does not exist (or did not collect any
79
104
// link/anchor) is considered as a broken anchor
80
105
if ( ! targetPage ) {
81
106
return true ;
82
107
}
83
-
84
- // it's a broken anchor if the target page exists
85
- // but the anchor does not exist on that page
86
- const hashes = [ hash , decodeURIComponent ( hash ) ] ;
87
- return ! targetPage . anchors . some ( ( anchor ) => hashes . includes ( anchor ) ) ;
108
+ // it's a not broken anchor if the anchor exists on the target page
109
+ if (
110
+ targetPage . anchors . has ( hash ) ||
111
+ targetPage . anchors . has ( decodeURIComponent ( hash ) )
112
+ ) {
113
+ return false ;
114
+ }
115
+ return true ;
88
116
}
89
117
90
- const brokenLinks = pageLinks . flatMap ( ( link ) => {
118
+ return {
119
+ collectedLinks,
120
+ isPathBrokenLink,
121
+ isAnchorBrokenLink,
122
+ } ;
123
+ }
124
+
125
+ function getBrokenLinksForPage ( {
126
+ pagePath,
127
+ helper,
128
+ } : {
129
+ pagePath : string ;
130
+ helper : BrokenLinksHelper ;
131
+ } ) : BrokenLink [ ] {
132
+ const pageData = helper . collectedLinks . get ( pagePath ) ! ;
133
+
134
+ const brokenLinks : BrokenLink [ ] = [ ] ;
135
+
136
+ pageData . links . forEach ( ( link ) => {
91
137
const linkPath = parseURLPath ( link , pagePath ) ;
92
- if ( isPathBrokenLink ( linkPath ) ) {
93
- return [
94
- {
95
- link,
96
- resolvedLink : serializeURLPath ( linkPath ) ,
97
- anchor : false ,
98
- } ,
99
- ] ;
100
- }
101
- if ( isAnchorBrokenLink ( linkPath ) ) {
102
- return [
103
- {
104
- link,
105
- resolvedLink : serializeURLPath ( linkPath ) ,
106
- anchor : true ,
107
- } ,
108
- ] ;
138
+ if ( helper . isPathBrokenLink ( linkPath ) ) {
139
+ brokenLinks . push ( {
140
+ link,
141
+ resolvedLink : serializeURLPath ( linkPath ) ,
142
+ anchor : false ,
143
+ } ) ;
144
+ } else if ( helper . isAnchorBrokenLink ( linkPath ) ) {
145
+ brokenLinks . push ( {
146
+ link,
147
+ resolvedLink : serializeURLPath ( linkPath ) ,
148
+ anchor : true ,
149
+ } ) ;
109
150
}
110
- return [ ] ;
111
151
} ) ;
112
152
113
153
return brokenLinks ;
@@ -128,26 +168,30 @@ function getBrokenLinks({
128
168
collectedLinks,
129
169
routes,
130
170
} : {
131
- collectedLinks : CollectedLinks ;
171
+ collectedLinks : CollectedLinksNormalized ;
132
172
routes : RouteConfig [ ] ;
133
173
} ) : BrokenLinksMap {
134
174
const filteredRoutes = filterIntermediateRoutes ( routes ) ;
135
175
136
- return _ . mapValues ( collectedLinks , ( pageCollectedData , pagePath ) => {
176
+ const helper = createBrokenLinksHelper ( {
177
+ collectedLinks,
178
+ routes : filteredRoutes ,
179
+ } ) ;
180
+
181
+ const result : BrokenLinksMap = { } ;
182
+ collectedLinks . forEach ( ( _unused , pagePath ) => {
137
183
try {
138
- return getBrokenLinksForPage ( {
139
- collectedLinks,
140
- pageLinks : pageCollectedData . links ,
141
- pageAnchors : pageCollectedData . anchors ,
184
+ result [ pagePath ] = getBrokenLinksForPage ( {
142
185
pagePath,
143
- routes : filteredRoutes ,
186
+ helper ,
144
187
} ) ;
145
188
} catch ( e ) {
146
189
throw new Error ( `Unable to get broken links for page ${ pagePath } .` , {
147
190
cause : e ,
148
191
} ) ;
149
192
}
150
193
} ) ;
194
+ return result ;
151
195
}
152
196
153
197
function brokenLinkMessage ( brokenLink : BrokenLink ) : string {
@@ -303,15 +347,22 @@ function reportBrokenLinks({
303
347
// JS users might call "collectLink(undefined)" for example
304
348
// TS users might call "collectAnchor('#hash')" with/without #
305
349
// We clean/normalize the collected data to avoid obscure errors being thrown
350
+ // We also use optimized data structures for a faster algorithm
306
351
function normalizeCollectedLinks (
307
352
collectedLinks : CollectedLinks ,
308
- ) : CollectedLinks {
309
- return _ . mapValues ( collectedLinks , ( pageCollectedData ) => ( {
310
- links : pageCollectedData . links . filter ( _ . isString ) ,
311
- anchors : pageCollectedData . anchors
312
- . filter ( _ . isString )
313
- . map ( ( anchor ) => ( anchor . startsWith ( '#' ) ? anchor . slice ( 1 ) : anchor ) ) ,
314
- } ) ) ;
353
+ ) : CollectedLinksNormalized {
354
+ const result : CollectedLinksNormalized = new Map ( ) ;
355
+ Object . entries ( collectedLinks ) . forEach ( ( [ pathname , pageCollectedData ] ) => {
356
+ result . set ( pathname , {
357
+ links : new Set ( pageCollectedData . links . filter ( _ . isString ) ) ,
358
+ anchors : new Set (
359
+ pageCollectedData . anchors
360
+ . filter ( _ . isString )
361
+ . map ( ( anchor ) => ( anchor . startsWith ( '#' ) ? anchor . slice ( 1 ) : anchor ) ) ,
362
+ ) ,
363
+ } ) ;
364
+ } ) ;
365
+ return result ;
315
366
}
316
367
317
368
export async function handleBrokenLinks ( {
0 commit comments