Object contains setting for crawler. @author code4crafter@gmail.com @see us.codecraft.webmagic.processor.PageProcessor @since 0.1.0
| 17 | * @since 0.1.0 |
| 18 | */ |
| 19 | public class Site { |
| 20 | |
| 21 | private String domain; |
| 22 | |
| 23 | private String userAgent; |
| 24 | |
| 25 | private Map<String, String> defaultCookies = new LinkedHashMap<String, String>(); |
| 26 | |
| 27 | private Map<String, Map<String, String>> cookies = new HashMap<String, Map<String, String>>(); |
| 28 | |
| 29 | private String charset; |
| 30 | |
| 31 | private String defaultCharset; |
| 32 | |
| 33 | private int sleepTime = 5000; |
| 34 | |
| 35 | private int retryTimes = 0; |
| 36 | |
| 37 | private int cycleRetryTimes = 0; |
| 38 | |
| 39 | private int retrySleepTime = 1000; |
| 40 | |
| 41 | private int timeOut = 5000; |
| 42 | |
| 43 | private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>(); |
| 44 | |
| 45 | private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET; |
| 46 | |
| 47 | private Map<String, String> headers = new HashMap<String, String>(); |
| 48 | |
| 49 | private boolean useGzip = true; |
| 50 | |
| 51 | private boolean disableCookieManagement = false; |
| 52 | |
| 53 | static { |
| 54 | DEFAULT_STATUS_CODE_SET.add(HttpConstant.StatusCode.CODE_200); |
| 55 | } |
| 56 | |
| 57 | /** |
| 58 | * new a Site |
| 59 | * |
| 60 | * @return new site |
| 61 | */ |
| 62 | public static Site me() { |
| 63 | return new Site(); |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * Add a cookie with domain {@link #getDomain()} |
| 68 | * |
| 69 | * @param name name |
| 70 | * @param value value |
| 71 | * @return this |
| 72 | */ |
| 73 | public Site addCookie(String name, String value) { |
| 74 | defaultCookies.put(name, value); |
| 75 | return this; |
| 76 | } |
nothing calls this directly
no outgoing calls
no test coverage detected