Spaces:
Sleeping
Sleeping
import requests | |
import os | |
from typing import Optional | |
from urllib.parse import urlparse, unquote | |
import gdrive | |
def get_filename_from_url(url: str, cd: Optional[str]) -> str: | |
""" | |
Extracts and returns the filename from the URL or content-disposition header. | |
""" | |
if cd: | |
fname = [x.strip() for x in cd.split(';') if x.strip().startswith('filename=')] | |
if fname: | |
return unquote(fname[0].split('=')[1].strip('"')) | |
# Fallback to extracting filename from URL | |
parsed_url = urlparse(url) | |
return os.path.basename(parsed_url.path) | |
def download_file(url: str, save_dir: Optional[str] = None, save_name: Optional[str] = None) -> None: | |
""" | |
Downloads a file from the given URL and saves it in the specified directory. | |
If the directory does not exist, it will be created. | |
""" | |
try: | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
filename = save_name if save_name else get_filename_from_url(url, response.headers.get('content-disposition')) | |
if save_dir: | |
os.makedirs(save_dir, exist_ok=True) | |
file_path = os.path.join(save_dir, filename) | |
else: | |
file_path = filename | |
with open(file_path, 'wb') as file: | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: | |
file.write(chunk) | |
print(f"File downloaded and saved as: {file_path}") | |
except requests.exceptions.HTTPError as err: | |
print(f"HTTP Error: {err}") | |
except Exception as e: | |
print(f"Error: {e}") | |
if __name__ == "__main__": | |
# Example Usage | |
url = "https://llamahack.slack.com/files/U069A8NRB9T/F068ZTLK9KR/anthem_hsa_medical_insurance_benefit_booklet.pdf" | |
download_file(url,"data") | |