0
net/__init__.py
Normal file
0
net/__init__.py
Normal file
79
net/download.py
Normal file
79
net/download.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Library for downloading files from the web with CLI output."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from cli.clibella import Printer
|
||||
|
||||
|
||||
def download_file(
|
||||
path_to_output_file,
|
||||
url_to_file,
|
||||
show_progress=False,
|
||||
printer=None,
|
||||
):
|
||||
"""Downloads the file at the input URL to the specified path.
|
||||
|
||||
The file is downloaded via HTTP/HTTPS and saved to the specified path.
|
||||
Optionally, displays a nice status bar.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path_to_output_file : str or pathlike object
|
||||
Path to a file as which the downloaded file is saved.
|
||||
url_to_file : str
|
||||
URL to the file to be downloaded.
|
||||
show_progress : bool
|
||||
When True, a progress bar is displayed on StdOut indicating the
|
||||
progress of the download.
|
||||
printer : clibella.Printer
|
||||
A clibella.Printer used to print CLI output.
|
||||
"""
|
||||
|
||||
if '~' in str(path_to_output_file):
|
||||
path_to_output_file = Path(path_to_output_file).expanduser()
|
||||
path_to_output_file = Path(path_to_output_file).resolve()
|
||||
|
||||
if not path_to_output_file.parent.is_dir():
|
||||
raise FileNotFoundError(
|
||||
f"No such directory: '{path_to_output_file.parent}'."
|
||||
)
|
||||
if path_to_output_file.exists():
|
||||
raise FileExistsError(
|
||||
f"File already exists: '{path_to_output_file}'"
|
||||
)
|
||||
|
||||
if printer is None:
|
||||
p = Printer()
|
||||
else:
|
||||
p = printer
|
||||
|
||||
output_file_name = path_to_output_file.name
|
||||
with open(path_to_output_file, "wb") as output_file:
|
||||
p.info(f"Downloading '{output_file_name}'...")
|
||||
file_response = requests.get(url_to_file, stream=True)
|
||||
total_length = file_response.headers.get('content-length')
|
||||
|
||||
if total_length is None: # no content length header
|
||||
output_file.write(file_response.content)
|
||||
else:
|
||||
if (show_progress):
|
||||
total_length = int(total_length)
|
||||
progress_bar = tqdm(
|
||||
total=total_length,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024
|
||||
)
|
||||
|
||||
for data in file_response.iter_content(chunk_size=4096):
|
||||
output_file.write(data)
|
||||
if (show_progress):
|
||||
progress_bar.update(len(data))
|
||||
|
||||
if (show_progress):
|
||||
progress_bar.close()
|
||||
|
||||
p.ok(f"Received '{output_file_name}'.")
|
||||
108
net/scrape.py
Normal file
108
net/scrape.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""Methods for scraping the debian website for specific file URLs."""
|
||||
|
||||
from re import compile
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_debian_preseed_file_urls():
|
||||
"""Returns a dict containing the URLs for the debian example preseed files.
|
||||
|
||||
The dict has the following structure:
|
||||
{
|
||||
"basic": {
|
||||
"url": "https://...",
|
||||
"name": "...",
|
||||
},
|
||||
"full": {
|
||||
"url": "https://...",
|
||||
"name": "...",
|
||||
},
|
||||
}
|
||||
where "basic" points to the basic preseed file and its filename, and "full"
|
||||
points to the full preseed file and its filename.
|
||||
"""
|
||||
|
||||
preseed_file_urls = {
|
||||
"basic": {
|
||||
"url": "https://www.debian.org/releases/stable/example-preseed.txt",
|
||||
"name": "example-preseed.txt",
|
||||
},
|
||||
"full": {
|
||||
"url": "https://preseed.debian.net/debian-preseed/bullseye/amd64-main-full.txt",
|
||||
"name": "amd64-main-full.txt",
|
||||
},
|
||||
}
|
||||
|
||||
return preseed_file_urls
|
||||
|
||||
|
||||
def get_debian_iso_urls():
|
||||
"""Retrieves a dict containing the URLs for a debian installation image.
|
||||
|
||||
The dict has the following structure:
|
||||
{
|
||||
"image_file": {
|
||||
"url": "https://...",
|
||||
"name": "debian-xx.x.x-amd64-netinst.iso",
|
||||
},
|
||||
"hash_file": {
|
||||
"url": "https://...",
|
||||
"name": "SHA512SUMS",
|
||||
},
|
||||
"signature_file": {
|
||||
"url": "https://...",
|
||||
"name": "SHA512SUMS.sign",
|
||||
},
|
||||
}
|
||||
where "image_file" is points to the latest debian stable x86-64bit
|
||||
net-installation ISO image, "hash_file" points to a SHA512SUMS file
|
||||
containing the SHA512 checksum for the ISO file, and "signature_file"
|
||||
points to a file containing a PGP signature for verification of the
|
||||
SHA512SUMS file.
|
||||
Each top-level dict entry contains a "name" key representing a file name,
|
||||
and a "url" key specifying a URL to that file.
|
||||
|
||||
The function scrapes the official debian.org website to retrieve the URLs.
|
||||
"""
|
||||
|
||||
# request the debian releases page
|
||||
releases_url = "https://cdimage.debian.org/debian-cd/current/amd64/iso-cd/"
|
||||
releases_page = requests.get(releases_url)
|
||||
if not releases_page.status_code == 200:
|
||||
raise RuntimeError("Unexpected status code during request.")
|
||||
|
||||
hash_file_name = "SHA512SUMS"
|
||||
hash_file_url = releases_url + hash_file_name
|
||||
signature_file_name = "SHA512SUMS.sign"
|
||||
signature_file_url = releases_url + signature_file_name
|
||||
|
||||
# find the exact URL to the latest stable x64 netinst ISO file
|
||||
soup = BeautifulSoup(releases_page.content, "html.parser")
|
||||
image_file_links = soup.find_all(
|
||||
name="a",
|
||||
string=compile(r"debian-[0-9.]*-amd64-netinst.iso")
|
||||
)
|
||||
if len(image_file_links) != 1:
|
||||
raise RuntimeError(
|
||||
"Failed to find an exact match while looking for "
|
||||
"a link to the latest debian image file."
|
||||
)
|
||||
image_file_name = image_file_links[0]['href']
|
||||
image_file_url = releases_url + image_file_name
|
||||
|
||||
return {
|
||||
"image_file": {
|
||||
"url": image_file_url,
|
||||
"name": image_file_name,
|
||||
},
|
||||
"hash_file": {
|
||||
"url": hash_file_url,
|
||||
"name": hash_file_name,
|
||||
},
|
||||
"signature_file": {
|
||||
"url": signature_file_url,
|
||||
"name": signature_file_name,
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user