This commit is contained in:
Germey 2021-06-27 21:08:58 +08:00
parent 18059bb1a8
commit c4082d4cd6
2 changed files with 4 additions and 11 deletions

View File

@ -5,15 +5,11 @@ import pymongo
from pyquery import PyQuery as pq
from urllib.parse import urljoin
import multiprocessing
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single warning from urllib3 needed.
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
BASE_URL = 'https://ssr2.scrape.center'
BASE_URL = 'https://ssr3.scrape.center'
TOTAL_PAGE = 10
MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'movies'
@ -32,7 +28,7 @@ def scrape_page(url):
"""
logging.info('scraping %s...', url)
try:
response = requests.get(url, verify=False)
response = requests.get(url, auth=('admin', 'admin'))
if response.status_code == 200:
return response.text
logging.error('get invalid status code %s while scraping %s',

View File

@ -6,14 +6,11 @@ import logging
import re
from urllib.parse import urljoin
import multiprocessing
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single warning from urllib3 needed.
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
BASE_URL = 'https://ssr2.scrape.center'
BASE_URL = 'https://ssr3.scrape.center'
TOTAL_PAGE = 10
RESULTS_DIR = 'results'
@ -28,7 +25,7 @@ def scrape_page(url):
"""
logging.info('scraping %s...', url)
try:
response = requests.get(url, verify=False)
response = requests.get(url, auth=('admin', 'admin'))
if response.status_code == 200:
return response.text
logging.error('get invalid status code %s while scraping %s',