Home > AI > Uncategorized

One code to download all PDFs from

import requests
from bs4 import BeautifulSoup
from datetime import datetime

url = 'https://www.assetowl.com/investor-centre/asx-announcements' 
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

for link in soup.find_all('a'):
    href = link.get('href')
    if href.endswith('.pdf'):
        title = link.find('div', class_='announcement-name').text.strip()
        day = link.find('div', class_='announcements-day').text.strip()
        month_year = link.find('div', class_='announcement-date-con').find_all('div')[1].text.strip()

        month_name = month_year.split()[0]
        year = month_year.split()[1]
        month = datetime.strptime(month_name, '%B').month  # Convert month name to number
        
        formatted_date = f"{year}{month:02d}{day.zfill(2)}"  # Format as YYYYMMDD
        new_filename = f"{formatted_date}-{title}.pdf"
        
        pdf_url = href if href.startswith('http') else url + href
        pdf_response = requests.get(pdf_url)

        with open(new_filename, 'wb') as f:
            f.write(pdf_response.content)

Related posts:

Leave a Reply