Script to download TLS announcements in bulk
import pandas as pd
import requests
import os
import openpyxl
import re
from datetime import datetime
def generate_filename(date, title):
formatted_date = date.replace('-', '')
clean_title = re.sub(r'[<>:"/\\|?*]', '', title)
filename = f"{formatted_date}-{clean_title}"
return filename
def download_pdf(output_folder, url, filename):
try:
response = requests.get(url)
if response.status_code == 200:
file_path = os.path.join(output_folder, filename)
with open(file_path, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
else:
print(f"Failed to download {url}: Status code {response.status_code}")
except Exception as e:
print(f"Error downloading {url}: {e}")
excel_file = 'Book8.xlsx'
sheet_name = 'Sheet1'
output_folder = '/Users/dph/documents/work-Stock/ASX-TLS/c'
df = pd.read_excel(excel_file, sheet_name=sheet_name)
wb = openpyxl.load_workbook(excel_file)
sheet = wb.active
urls = []
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=2):
date_cell = row[0]
title_cell = row[1]
if title_cell.hyperlink:
url = title_cell.hyperlink.target
date = date_cell.value.strftime('%Y-%m-%d') if isinstance(date_cell.value, datetime) else str(date_cell.value)
title = title_cell.value
filename = generate_filename(date, title)
download_pdf(output_folder, url, filename)
print("All downloads complete.")