Home > AI > Uncategorized

One code to download all PDFs from

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import requests
from bs4 import BeautifulSoup
from datetime import datetime
 
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
 
for link in soup.find_all('a'):
    href = link.get('href')
    if href.endswith('.pdf'):
        title = link.find('div', class_='announcement-name').text.strip()
        day = link.find('div', class_='announcements-day').text.strip()
        month_year = link.find('div', class_='announcement-date-con').find_all('div')[1].text.strip()
 
        month_name = month_year.split()[0]
        year = month_year.split()[1]
        month = datetime.strptime(month_name, '%B').month  # Convert month name to number
         
        formatted_date = f"{year}{month:02d}{day.zfill(2)}"  # Format as YYYYMMDD
        new_filename = f"{formatted_date}-{title}.pdf"
         
        pdf_url = href if href.startswith('http') else url + href
        pdf_response = requests.get(pdf_url)
 
        with open(new_filename, 'wb') as f:
            f.write(pdf_response.content)

Related posts:

Leave a Reply