1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| import requests import pandas as pd from bs4 import BeautifulSoup import multiprocessing as mp
def news_craw(topic): google_news = 'https://news.google.com' url = f'{google_news}/topics/{topic}?hl=zh-TW&gl=TW&ceid=TW%3Azh-Hant' res = requests.get(url) soup = BeautifulSoup(res.text.encode('utf-8'), "html.parser") news = [] for i in range(5): news.append({ 'title': soup.select('div.xrnccd h3')[i].text, 'link': google_news + soup.select('div.xrnccd h3 a')[i]['href'][1:] }) return news
|