Getting dates of publishing article using pygooglenews

  beautifulsoup, pygooglenews, python, web-scraping

I have written the following code to get me company-wise news from the last 6 months from Google, but I am not able to find a generic way to extract the dates of publishing of the articles. Is there a way this can be done?

pip install pygooglenews --upgrade
from pygooglenews import GoogleNews
# default GoogleNews instance
gn = GoogleNews(lang = 'en', country = 'US')
companies=[]
import csv
with open('/content/drive/MyDrive/Companies.csv') as f:
  csv_reader = csv.reader(f, delimiter=',')
  for row in csv_reader:
    companies.append(row[3])
f.close()

companies.pop(0)
companies.pop(0)
#companies
t_news=[]
t_company=[]
timestamp=[]
for company in companies:
  search = gn.search(company, when = '6m')
  links=[]
  for item in search['entries']:
    links.append(item['link'])
  news=[]
  for link in links:
    r = requests.Session()
    headers = random_header()
    r.headers = headers
    res = r.get(link, headers=headers)
    parsed_article = bs4.BeautifulSoup(res.text,'lxml')
    paragraphs = parsed_article.find_all('p')
    article_text = ""
    for p in paragraphs:
      article_text+= p.text
    t_news.append(article_text)
    t_company.append(company)
    

    
import pandas as pd
d = {"Company Code": t_company, "News": t_news, "Timestamp": timestamp}
df = pd.DataFrame(d)
df.to_csv('/content/drive/MyDrive/GoogleNews200921.csv')

Source: Python Questions

LEAVE A COMMENT