Reimplementation (#1)

* refactor into separate modules * update README
2022-08-01 12:23:41 +02:00
parent 5665759667
commit b69fd8cfa9
25 changed files with 1274 additions and 1323 deletions
--- a/net/init.py
+++ b/net/init.py
--- a/net/download.py
+++ b/net/download.py
@@ -0,0 +1,79 @@
+"""Library for downloading files from the web with CLI output."""
+
+from pathlib import Path
+
+import requests
+from tqdm import tqdm
+
+from cli.clibella import Printer
+
+
+def download_file(
+        path_to_output_file,
+        url_to_file,
+        show_progress=False,
+        printer=None,
+):
+    """Downloads the file at the input URL to the specified path.
+
+    The file is downloaded via HTTP/HTTPS and saved to the specified path.
+    Optionally, displays a nice status bar.
+
+    Parameters
+    ----------
+    path_to_output_file : str or pathlike object
+        Path to a file as which the downloaded file is saved.
+    url_to_file : str
+        URL to the file to be downloaded.
+    show_progress : bool
+        When True, a progress bar is displayed on StdOut indicating the
+        progress of the download.
+    printer : clibella.Printer
+        A clibella.Printer used to print CLI output.
+    """
+
+    if '~' in str(path_to_output_file):
+        path_to_output_file = Path(path_to_output_file).expanduser()
+    path_to_output_file = Path(path_to_output_file).resolve()
+
+    if not path_to_output_file.parent.is_dir():
+        raise FileNotFoundError(
+            f"No such directory: '{path_to_output_file.parent}'."
+        )
+    if path_to_output_file.exists():
+        raise FileExistsError(
+            f"File already exists: '{path_to_output_file}'"
+        )
+
+    if printer is None:
+        p = Printer()
+    else:
+        p = printer
+
+    output_file_name = path_to_output_file.name
+    with open(path_to_output_file, "wb") as output_file:
+        p.info(f"Downloading '{output_file_name}'...")
+        file_response = requests.get(url_to_file, stream=True)
+        total_length = file_response.headers.get('content-length')
+
+        if total_length is None:  # no content length header
+            output_file.write(file_response.content)
+        else:
+            if (show_progress):
+                total_length = int(total_length)
+                progress_bar = tqdm(
+                    total=total_length,
+                    unit="B",
+                    unit_scale=True,
+                    unit_divisor=1024
+                )
+
+            for data in file_response.iter_content(chunk_size=4096):
+                output_file.write(data)
+                if (show_progress):
+                    progress_bar.update(len(data))
+
+            if (show_progress):
+                progress_bar.close()
+
+        p.ok(f"Received '{output_file_name}'.")
--- a/net/scrape.py
+++ b/net/scrape.py
@@ -0,0 +1,108 @@
+"""Methods for scraping the debian website for specific file URLs."""
+
+from re import compile
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def get_debian_preseed_file_urls():
+    """Returns a dict containing the URLs for the debian example preseed files.
+
+    The dict has the following structure:
+    {
+        "basic": {
+            "url": "https://...",
+            "name": "...",
+        },
+        "full": {
+            "url": "https://...",
+            "name": "...",
+        },
+    }
+    where "basic" points to the basic preseed file and its filename, and "full"
+    points to the full preseed file and its filename.
+    """
+
+    preseed_file_urls = {
+        "basic": {
+            "url": "https://www.debian.org/releases/stable/example-preseed.txt",
+            "name": "example-preseed.txt",
+        },
+        "full": {
+            "url": "https://preseed.debian.net/debian-preseed/bullseye/amd64-main-full.txt",
+            "name": "amd64-main-full.txt",
+        },
+    }
+
+    return preseed_file_urls
+
+
+def get_debian_iso_urls():
+    """Retrieves a dict containing the URLs for a debian installation image.
+
+    The dict has the following structure:
+    {
+        "image_file": {
+            "url": "https://...",
+            "name": "debian-xx.x.x-amd64-netinst.iso",
+        },
+        "hash_file": {
+            "url": "https://...",
+            "name": "SHA512SUMS",
+        },
+        "signature_file": {
+            "url": "https://...",
+            "name": "SHA512SUMS.sign",
+        },
+    }
+    where "image_file" is points to the latest debian stable x86-64bit
+    net-installation ISO image, "hash_file" points to a SHA512SUMS file
+    containing the SHA512 checksum for the ISO file, and "signature_file"
+    points to a file containing a PGP signature for verification of the
+    SHA512SUMS file.
+    Each top-level dict entry contains a "name" key representing a file name,
+    and a "url" key specifying a URL to that file.
+
+    The function scrapes the official debian.org website to retrieve the URLs.
+    """
+
+    # request the debian releases page
+    releases_url = "https://cdimage.debian.org/debian-cd/current/amd64/iso-cd/"
+    releases_page = requests.get(releases_url)
+    if not releases_page.status_code == 200:
+        raise RuntimeError("Unexpected status code during request.")
+
+    hash_file_name = "SHA512SUMS"
+    hash_file_url = releases_url + hash_file_name
+    signature_file_name = "SHA512SUMS.sign"
+    signature_file_url = releases_url + signature_file_name
+
+    # find the exact URL to the latest stable x64 netinst ISO file
+    soup = BeautifulSoup(releases_page.content, "html.parser")
+    image_file_links = soup.find_all(
+        name="a",
+        string=compile(r"debian-[0-9.]*-amd64-netinst.iso")
+    )
+    if len(image_file_links) != 1:
+        raise RuntimeError(
+            "Failed to find an exact match while looking for "
+            "a link to the latest debian image file."
+        )
+    image_file_name = image_file_links[0]['href']
+    image_file_url = releases_url + image_file_name
+
+    return {
+        "image_file": {
+            "url": image_file_url,
+            "name": image_file_name,
+        },
+        "hash_file": {
+            "url": hash_file_url,
+            "name": hash_file_name,
+        },
+        "signature_file": {
+            "url": signature_file_url,
+            "name": signature_file_name,
+        },
+    }