| 12 | |
| 13 | |
| 14 | class TestTaskSpider(feapder.TaskSpider): |
| 15 | def add_task(self): |
| 16 | # 加种子任务 框架会调用这个函数,方便往redis里塞任务,但不能写成死循环。实际业务中可以自己写个脚本往redis里塞任务 |
| 17 | self._redisdb.zadd(self._task_table, {"id": 1, "url": "https://www.baidu.com"}) |
| 18 | |
| 19 | def start_requests(self, task): |
| 20 | task_id, url = task |
| 21 | yield feapder.Request(url, task_id=task_id) |
| 22 | |
| 23 | def parse(self, request, response): |
| 24 | # 提取网站title |
| 25 | print(response.xpath("//title/text()").extract_first()) |
| 26 | # 提取网站描述 |
| 27 | print(response.xpath("//meta[@name='description']/@content").extract_first()) |
| 28 | print("网站地址: ", response.url) |
| 29 | |
| 30 | # mysql 需要更新任务状态为做完 即 state=1 |
| 31 | # yield self.update_task_batch(request.task_id) |
| 32 | |
| 33 | |
| 34 | def start(args): |