| 16 | |
| 17 | |
| 18 | class GetData: |
| 19 | REMOTE_URL = "https://github.com/SunsetWolf/qlib_dataset/releases/download" |
| 20 | |
| 21 | def __init__(self, delete_zip_file=False): |
| 22 | """ |
| 23 | |
| 24 | Parameters |
| 25 | ---------- |
| 26 | delete_zip_file : bool, optional |
| 27 | Whether to delete the zip file, value from True or False, by default False |
| 28 | """ |
| 29 | self.delete_zip_file = delete_zip_file |
| 30 | |
| 31 | def merge_remote_url(self, file_name: str): |
| 32 | """ |
| 33 | Generate download links. |
| 34 | |
| 35 | Parameters |
| 36 | ---------- |
| 37 | file_name: str |
| 38 | The name of the file to be downloaded. |
| 39 | The file name can be accompanied by a version number, (e.g.: v2/qlib_data_simple_cn_1d_latest.zip), |
| 40 | if no version number is attached, it will be downloaded from v0 by default. |
| 41 | """ |
| 42 | return f"{self.REMOTE_URL}/{file_name}" if "/" in file_name else f"{self.REMOTE_URL}/v0/{file_name}" |
| 43 | |
| 44 | def download(self, url: str, target_path: [Path, str]): |
| 45 | """ |
| 46 | Download a file from the specified url. |
| 47 | |
| 48 | Parameters |
| 49 | ---------- |
| 50 | url: str |
| 51 | The url of the data. |
| 52 | target_path: str |
| 53 | The location where the data is saved, including the file name. |
| 54 | """ |
| 55 | file_name = str(target_path).rsplit("/", maxsplit=1)[-1] |
| 56 | resp = requests.get(url, stream=True, timeout=60) |
| 57 | resp.raise_for_status() |
| 58 | if resp.status_code != 200: |
| 59 | raise requests.exceptions.HTTPError() |
| 60 | |
| 61 | chunk_size = 1024 |
| 62 | logger.warning( |
| 63 | f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)" |
| 64 | ) |
| 65 | logger.info(f"{os.path.basename(file_name)} downloading......") |
| 66 | with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar: |
| 67 | with target_path.open("wb") as fp: |
| 68 | for chunk in resp.iter_content(chunk_size=chunk_size): |
| 69 | fp.write(chunk) |
| 70 | p_bar.update(chunk_size) |
| 71 | |
| 72 | def download_data(self, file_name: str, target_dir: [Path, str], delete_old: bool = True): |
| 73 | """ |
| 74 | Download the specified file to the target folder. |
| 75 |
no outgoing calls