download full site using single-file
(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
| 37 | |
| 38 | @enforce_types |
| 39 | def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: |
| 40 | """download full site using single-file""" |
| 41 | |
| 42 | out_dir = out_dir or Path(link.link_dir) |
| 43 | output = "singlefile.html" |
| 44 | |
| 45 | browser_args = chrome_args(CHROME_TIMEOUT=0) |
| 46 | |
| 47 | # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli |
| 48 | browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:])) |
| 49 | options = [ |
| 50 | *SINGLEFILE_ARGS, |
| 51 | '--browser-executable-path={}'.format(CHROME_BINARY), |
| 52 | browser_args, |
| 53 | ] |
| 54 | |
| 55 | # Deduplicate options (single-file doesn't like when you use the same option two times) |
| 56 | # |
| 57 | # NOTE: Options names that come first clobber conflicting names that come later |
| 58 | # My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most |
| 59 | # specificity, therefore the user sets it with a lot intent, therefore it should take precedence |
| 60 | # kind of like the ergonomic principle of lexical scope in programming languages. |
| 61 | seen_option_names = [] |
| 62 | def test_seen(argument): |
| 63 | option_name = argument.split("=")[0] |
| 64 | if option_name in seen_option_names: |
| 65 | return False |
| 66 | else: |
| 67 | seen_option_names.append(option_name) |
| 68 | return True |
| 69 | deduped_options = list(filter(test_seen, options)) |
| 70 | |
| 71 | cmd = [ |
| 72 | DEPENDENCIES['SINGLEFILE_BINARY']['path'], |
| 73 | *deduped_options, |
| 74 | link.url, |
| 75 | output, |
| 76 | ] |
| 77 | |
| 78 | status = 'succeeded' |
| 79 | timer = TimedProgress(timeout, prefix=' ') |
| 80 | try: |
| 81 | result = run(cmd, cwd=str(out_dir), timeout=timeout) |
| 82 | |
| 83 | # parse out number of files downloaded from last line of stderr: |
| 84 | # "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)" |
| 85 | output_tail = [ |
| 86 | line.strip() |
| 87 | for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] |
| 88 | if line.strip() |
| 89 | ] |
| 90 | hints = ( |
| 91 | 'Got single-file response code: {}.'.format(result.returncode), |
| 92 | *output_tail, |
| 93 | ) |
| 94 | |
| 95 | # Check for common failure cases |
| 96 | if (result.returncode > 0) or not (out_dir / output).is_file(): |
nothing calls this directly
no test coverage detected