MCPcopy
hub / github.com/ThatGuySam/doesitarm / fetchAllUrlsFromSitemaps

Function fetchAllUrlsFromSitemaps

helpers/api/sitemap/parse.js:100–141  ·  view source on GitHub ↗
( urlString )

Source from the content-addressed store, hash-verified

98}
99
100export async function fetchAllUrlsFromSitemaps ( urlString ) {
101 // const domain = getDomainFromString( urlString )
102
103 const allUrls = new Map()
104
105 for ( const sitemapFile of sitemapFilesToTry ) {
106
107 const sitemapUrl = new URL( sitemapFile, urlString )
108
109 // console.log( 'sitemapUrl', sitemapUrl.href )
110
111 // Just do a quich HEAD request to see if the file exists with getting the whole body
112 const exists = await headOk( sitemapUrl.href )
113
114 // console.log( 'exists', exists )
115
116 // Skip if sitemap doesn't exist
117 if ( !exists ) continue
118
119 // Fetch the whole sitemap
120 const urls = await getAllUrlsFromSitemap( sitemapUrl.href , {
121 getMethod: async sitemapPath => {
122 const sitemapUrl = new URL( sitemapPath, urlString )
123
124 const sitemapXml = await getText( sitemapUrl.href )
125
126 return sitemapXml
127 }
128 })
129
130 // console.log( 'Sitemap urls', urls )
131
132 // Set our urls to the map
133 urls.forEach( tag => allUrls.set( tag.loc, new URL( tag.loc ) ) )
134
135 // Store sitemap urls to context
136 // const urlsMap = new Map( urls.map( tag => [ tag.loc, new URL( tag.loc )] ) )
137
138 }
139
140 return allUrls
141}

Callers 1

main.test.tsFile · 0.90

Calls 4

headOkFunction · 0.90
getTextFunction · 0.90
getAllUrlsFromSitemapFunction · 0.85
setMethod · 0.80

Tested by

no test coverage detected