分析内容中的url
(cont, host, scheme, path, source string, num int)
| 96 | |
| 97 | // 分析内容中的url |
| 98 | func urlFind(cont, host, scheme, path, source string, num int) { |
| 99 | var cata string |
| 100 | care := regexp.MustCompile("/.*/{1}|/") |
| 101 | catae := care.FindAllString(path, -1) |
| 102 | if len(catae) == 0 { |
| 103 | cata = "/" |
| 104 | } else { |
| 105 | cata = catae[0] |
| 106 | } |
| 107 | host = scheme + "://" + host |
| 108 | |
| 109 | //url匹配正则 |
| 110 | |
| 111 | for _, re := range config.UrlFind { |
| 112 | reg := regexp.MustCompile(re) |
| 113 | urls := reg.FindAllStringSubmatch(cont, -1) |
| 114 | //fmt.Println(urls) |
| 115 | urls = urlFilter(urls) |
| 116 | |
| 117 | //循环提取url放到结果中 |
| 118 | for _, url := range urls { |
| 119 | if url[0] == "" { |
| 120 | continue |
| 121 | } |
| 122 | if strings.HasPrefix(url[0], "https:") || strings.HasPrefix(url[0], "http:") { |
| 123 | switch AppendUrl(url[0], source) { |
| 124 | case 0: |
| 125 | if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { |
| 126 | config.Wg.Add(1) |
| 127 | config.Ch <- 1 |
| 128 | go Spider(url[0], num+1) |
| 129 | } |
| 130 | case 1: |
| 131 | return |
| 132 | case 2: |
| 133 | continue |
| 134 | } |
| 135 | } else if strings.HasPrefix(url[0], "//") { |
| 136 | switch AppendUrl(scheme+":"+url[0], source) { |
| 137 | case 0: |
| 138 | if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { |
| 139 | config.Wg.Add(1) |
| 140 | config.Ch <- 1 |
| 141 | go Spider(scheme+":"+url[0], num+1) |
| 142 | } |
| 143 | case 1: |
| 144 | return |
| 145 | case 2: |
| 146 | continue |
| 147 | } |
| 148 | |
| 149 | } else if strings.HasPrefix(url[0], "/") { |
| 150 | urlz := "" |
| 151 | if cmd.B != "" { |
| 152 | urlz = cmd.B + url[0] |
| 153 | } else { |
| 154 | urlz = host + url[0] |
| 155 | } |