MCPcopy
hub / github.com/pingc0y/URLFinder / urlFind

Function urlFind

crawler/find.go:98–226  ·  view source on GitHub ↗

分析内容中的url

(cont, host, scheme, path, source string, num int)

Source from the content-addressed store, hash-verified

96
97// 分析内容中的url
98func urlFind(cont, host, scheme, path, source string, num int) {
99 var cata string
100 care := regexp.MustCompile("/.*/{1}|/")
101 catae := care.FindAllString(path, -1)
102 if len(catae) == 0 {
103 cata = "/"
104 } else {
105 cata = catae[0]
106 }
107 host = scheme + "://" + host
108
109 //url匹配正则
110
111 for _, re := range config.UrlFind {
112 reg := regexp.MustCompile(re)
113 urls := reg.FindAllStringSubmatch(cont, -1)
114 //fmt.Println(urls)
115 urls = urlFilter(urls)
116
117 //循环提取url放到结果中
118 for _, url := range urls {
119 if url[0] == "" {
120 continue
121 }
122 if strings.HasPrefix(url[0], "https:") || strings.HasPrefix(url[0], "http:") {
123 switch AppendUrl(url[0], source) {
124 case 0:
125 if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) {
126 config.Wg.Add(1)
127 config.Ch <- 1
128 go Spider(url[0], num+1)
129 }
130 case 1:
131 return
132 case 2:
133 continue
134 }
135 } else if strings.HasPrefix(url[0], "//") {
136 switch AppendUrl(scheme+":"+url[0], source) {
137 case 0:
138 if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) {
139 config.Wg.Add(1)
140 config.Ch <- 1
141 go Spider(scheme+":"+url[0], num+1)
142 }
143 case 1:
144 return
145 case 2:
146 continue
147 }
148
149 } else if strings.HasPrefix(url[0], "/") {
150 urlz := ""
151 if cmd.B != "" {
152 urlz = cmd.B + url[0]
153 } else {
154 urlz = host + url[0]
155 }

Callers 2

SpiderFunction · 0.85

Calls 3

urlFilterFunction · 0.85
AppendUrlFunction · 0.85
SpiderFunction · 0.85