Prints out a list of urls with our heuristic guess if it is a valid news url purely based on the url
(self)
| 350 | |
| 351 | @print_test |
| 352 | def test_valid_urls(self): |
| 353 | """Prints out a list of urls with our heuristic guess if it is a |
| 354 | valid news url purely based on the url |
| 355 | """ |
| 356 | from newspaper.urls import valid_url |
| 357 | |
| 358 | with open(os.path.join(TEST_DIR, 'data/test_urls.txt'), 'r') as f: |
| 359 | lines = f.readlines() |
| 360 | test_tuples = [tuple(l.strip().split(' ')) for l in lines] |
| 361 | # tuples are ('1', 'url_goes_here') form, '1' means valid, |
| 362 | # '0' otherwise |
| 363 | |
| 364 | for tup in test_tuples: |
| 365 | lst = int(tup[0]) |
| 366 | url = tup[1] |
| 367 | assert len(tup) == 2 |
| 368 | truth_val = True if lst == 1 else False |
| 369 | try: |
| 370 | assert truth_val == valid_url(url, test=True) |
| 371 | except AssertionError: |
| 372 | print('\t\turl: %s is supposed to be %s' % (url, truth_val)) |
| 373 | raise |
| 374 | |
| 375 | @print_test |
| 376 | def test_prepare_url(self): |