Set ArchiveBox to regularly import URLs at specific times using cron
(add: bool=False,
show: bool=False,
clear: bool=False,
foreground: bool=False,
run_all: bool=False,
quiet: bool=False,
every: Optional[str]=None,
tag: str='',
depth: int=0,
overwrite: bool=False,
update: bool=not ONLY_NEW,
import_path: Optional[str]=None,
out_dir: Path=OUTPUT_DIR)
| 1162 | |
| 1163 | @enforce_types |
| 1164 | def schedule(add: bool=False, |
| 1165 | show: bool=False, |
| 1166 | clear: bool=False, |
| 1167 | foreground: bool=False, |
| 1168 | run_all: bool=False, |
| 1169 | quiet: bool=False, |
| 1170 | every: Optional[str]=None, |
| 1171 | tag: str='', |
| 1172 | depth: int=0, |
| 1173 | overwrite: bool=False, |
| 1174 | update: bool=not ONLY_NEW, |
| 1175 | import_path: Optional[str]=None, |
| 1176 | out_dir: Path=OUTPUT_DIR): |
| 1177 | """Set ArchiveBox to regularly import URLs at specific times using cron""" |
| 1178 | |
| 1179 | check_data_folder(out_dir=out_dir) |
| 1180 | |
| 1181 | Path(LOGS_DIR).mkdir(exist_ok=True) |
| 1182 | |
| 1183 | cron = CronTab(user=True) |
| 1184 | cron = dedupe_cron_jobs(cron) |
| 1185 | |
| 1186 | if clear: |
| 1187 | print(cron.remove_all(comment=CRON_COMMENT)) |
| 1188 | cron.write() |
| 1189 | raise SystemExit(0) |
| 1190 | |
| 1191 | existing_jobs = list(cron.find_comment(CRON_COMMENT)) |
| 1192 | |
| 1193 | if every or add: |
| 1194 | every = every or 'day' |
| 1195 | quoted = lambda s: f'"{s}"' if (s and ' ' in str(s)) else str(s) |
| 1196 | cmd = [ |
| 1197 | 'cd', |
| 1198 | quoted(out_dir), |
| 1199 | '&&', |
| 1200 | quoted(ARCHIVEBOX_BINARY), |
| 1201 | *([ |
| 1202 | 'add', |
| 1203 | *(['--overwrite'] if overwrite else []), |
| 1204 | *(['--update'] if update else []), |
| 1205 | *([f'--tag={tag}'] if tag else []), |
| 1206 | f'--depth={depth}', |
| 1207 | f'"{import_path}"', |
| 1208 | ] if import_path else ['update']), |
| 1209 | '>>', |
| 1210 | quoted(Path(LOGS_DIR) / 'schedule.log'), |
| 1211 | '2>&1', |
| 1212 | |
| 1213 | ] |
| 1214 | new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT) |
| 1215 | |
| 1216 | if every in ('minute', 'hour', 'day', 'month', 'year'): |
| 1217 | set_every = getattr(new_job.every(), every) |
| 1218 | set_every() |
| 1219 | elif CronSlices.is_valid(every): |
| 1220 | new_job.setall(every) |
| 1221 | else: |
no test coverage detected