1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import requests from bs4 import BeautifulSoup from datetime import datetime response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') for link in soup.find_all('a'): href = link.get('href') if href.endswith('.pdf'): title = link.find('div', class_='announcement-name').text.strip() day = link.find('div', class_='announcements-day').text.strip() month_year = link.find('div', class_='announcement-date-con').find_all('div')[1].text.strip() month_name = month_year.split()[0] year = month_year.split()[1] month = datetime.strptime(month_name, '%B').month # Convert month name to number formatted_date = f"{year}{month:02d}{day.zfill(2)}" # Format as YYYYMMDD new_filename = f"{formatted_date}-{title}.pdf" pdf_url = href if href.startswith('http') else url + href pdf_response = requests.get(pdf_url) with open(new_filename, 'wb') as f: f.write(pdf_response.content) |