Home > AI > Uncategorized

Download TLS announcements

Script to download TLS announcements in bulk

import pandas as pd
import requests
import os
import openpyxl
import re
from datetime import datetime  



def generate_filename(date, title):
    formatted_date = date.replace('-', '')
    clean_title = re.sub(r'[<>:"/\\|?*]', '', title) 
    filename = f"{formatted_date}-{clean_title}"
    return filename


def download_pdf(output_folder, url, filename):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            file_path = os.path.join(output_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(response.content)
            
            print(f"Downloaded: {filename}")
        else:
            print(f"Failed to download {url}: Status code {response.status_code}")
    except Exception as e:
        print(f"Error downloading {url}: {e}")




excel_file = 'Book8.xlsx'  
sheet_name = 'Sheet1' 
output_folder = '/Users/dph/documents/work-Stock/ASX-TLS/c'  

df = pd.read_excel(excel_file, sheet_name=sheet_name)
wb = openpyxl.load_workbook(excel_file)
sheet = wb.active  

urls = []
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=2): 
    date_cell = row[0]
    title_cell = row[1] 
    if title_cell.hyperlink:
        url = title_cell.hyperlink.target

        date = date_cell.value.strftime('%Y-%m-%d') if isinstance(date_cell.value, datetime) else str(date_cell.value)
        title = title_cell.value
        filename = generate_filename(date, title)

        download_pdf(output_folder, url, filename)
     


print("All downloads complete.")

Related posts:

Leave a Reply