MCPcopy
hub / github.com/hu17889/go_spider / downloadHtml

Method downloadHtml

core/downloader/downloader_http.go:229–254  ·  view source on GitHub ↗
(p *page.Page, req *request.Request)

Source from the content-addressed store, hash-verified

227}
228
229func (this *HttpDownloader) downloadHtml(p *page.Page, req *request.Request) *page.Page {
230 var err error
231 p, destbody := this.downloadFile(p, req)
232 if !p.IsSucc() {
233 return p
234 }
235 bodyReader := bytes.NewReader([]byte(destbody))
236
237 var doc *goquery.Document
238 if doc, err = goquery.NewDocumentFromReader(bodyReader); err != nil {
239 mlog.LogInst().LogError(err.Error())
240 p.SetStatus(true, err.Error())
241 return p
242 }
243
244 var body string
245 if body, err = doc.Html(); err != nil {
246 mlog.LogInst().LogError(err.Error())
247 p.SetStatus(true, err.Error())
248 return p
249 }
250
251 p.SetBodyStr(body).SetHtmlParser(doc).SetStatus(false, "")
252
253 return p
254}
255
256func (this *HttpDownloader) downloadJson(p *page.Page, req *request.Request) *page.Page {
257 var err error

Callers 1

DownloadMethod · 0.95

Calls 6

downloadFileMethod · 0.95
IsSuccMethod · 0.80
LogErrorMethod · 0.80
SetStatusMethod · 0.80
SetHtmlParserMethod · 0.80
SetBodyStrMethod · 0.80

Tested by

no test coverage detected