From 98518e444a47b2a7b727a8425cf97c995860c162 Mon Sep 17 00:00:00 2001 From: Anton Hvornum Date: Fri, 2 Aug 2024 15:24:44 +0200 Subject: [PATCH] Use `/mirrors/status/json/` instead of `/mirrorlist/` to get mirror lists (#2599) * Using JSON endpoint instead of ASCII endpoint for mirror listing, as the JSON endpoint is cached and easier to parse * Added a TODO to handle unknown regional mirrors (which lacks info in the backend) * Filtered out 'bad' mirrors. Also added a sorting mechanism that uses the mirrors 'score' rather than just the URL name. This will emulate the reflector.service/rankmirrors behavior and thus reducing the need to re-rank the mirrors. * Added the ability to sort mirrors via latency or download speed using sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed)) - but I have not implemented the sorting via the menu yet, and I have not integrated the new MirrorStatus model into the handling of URL's. I still need to figure out where the {region: [url, url]} is being used, so that i can convert to {region: [mirror.url, mirror.url]} logic. * Converting MirrorStatus model to {mirror: [url, url]} * Added debug information for /var/log/archinstall/install.log * Fixing flake8 * Fixed issue where 'dead' mirrors have no score, and thus can't be round():ed * Forgot to return model validation data after validation * Improving debug/info output * Reverting change in #2350 - Writing over instead of appending to mirrorlist * Mirror URL's reported by the JSON endpoint does not contain the repo format, only the base location for the mirror. So we have to adjust for this. * pydantic did not honor 'private' variables in 'before' model validator, had to change to 'after' instead. * Sorted out mypy typing matching the new MirrorStatus model * Added pydantic as a dependency, it's time! * Updated workflow to include pydantic * Added return values from model @property decorators. --- .flake8 | 3 +- .github/workflows/python-build.yml | 2 +- .gitignore | 3 + PKGBUILD | 1 + archinstall/lib/exceptions.py | 6 ++ archinstall/lib/installer.py | 2 +- archinstall/lib/mirrors.py | 70 +++++++++++------- archinstall/lib/models/mirrors.py | 91 +++++++++++++++++++++++ archinstall/lib/networking.py | 112 ++++++++++++++++++++++++++++- pyproject.toml | 1 + 10 files changed, 263 insertions(+), 28 deletions(-) create mode 100644 archinstall/lib/models/mirrors.py diff --git a/.flake8 b/.flake8 index 583b58cf..2235ad9b 100644 --- a/.flake8 +++ b/.flake8 @@ -1,10 +1,11 @@ [flake8] count = True # Several of the following could be autofixed or improved by running the code through psf/black -ignore = E123,E126,E128,E203,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504 +ignore = E123,E126,E128,E203,E227,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504 max-complexity = 40 max-line-length = 236 show-source = True statistics = True builtins = _ per-file-ignores = __init__.py:F401,F403,F405 simple_menu.py:C901,W503 guided.py:C901 network_configuration.py:F821 +exclude = .git,__pycache__,docs,actions-runner \ No newline at end of file diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml index 483e451b..59099614 100644 --- a/.github/workflows/python-build.yml +++ b/.github/workflows/python-build.yml @@ -17,7 +17,7 @@ jobs: pacman-key --init pacman --noconfirm -Sy archlinux-keyring pacman --noconfirm -Syyu - pacman --noconfirm -Sy python-pip python-pyparted python-simple-term-menu pkgconfig gcc + pacman --noconfirm -Sy python-pip python-pydantic python-pyparted python-simple-term-menu pkgconfig gcc - name: Install build dependencies run: | python -m pip install --break-system-packages --upgrade pip diff --git a/.gitignore b/.gitignore index a6075cc4..b03bd3a5 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,6 @@ venv /*.sig /*.json requirements.txt +/.gitconfig +/actions-runner +/cmd_output.txt \ No newline at end of file diff --git a/PKGBUILD b/PKGBUILD index 3c6043c6..7a8affea 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -22,6 +22,7 @@ depends=( 'pciutils' 'procps-ng' 'python' + 'python-pydantic' 'python-pyparted' 'python-simple-term-menu' 'systemd' diff --git a/archinstall/lib/exceptions.py b/archinstall/lib/exceptions.py index 80926e0b..a00b3c85 100644 --- a/archinstall/lib/exceptions.py +++ b/archinstall/lib/exceptions.py @@ -38,3 +38,9 @@ class PackageError(Exception): class Deprecated(Exception): pass + + +class DownloadTimeout(Exception): + ''' + Download timeout exception raised by DownloadTimer. + ''' \ No newline at end of file diff --git a/archinstall/lib/installer.py b/archinstall/lib/installer.py index d3c90875..852d302b 100644 --- a/archinstall/lib/installer.py +++ b/archinstall/lib/installer.py @@ -493,7 +493,7 @@ class Installer: if mirrorlist_config: debug(f'Mirrorlist: {mirrorlist_config}') - with local_mirrorlist_conf.open('a') as fp: + with local_mirrorlist_conf.open('w') as fp: fp.write(mirrorlist_config) def genfstab(self, flags: str = '-pU'): diff --git a/archinstall/lib/mirrors.py b/archinstall/lib/mirrors.py index c9094669..934c306d 100644 --- a/archinstall/lib/mirrors.py +++ b/archinstall/lib/mirrors.py @@ -1,3 +1,4 @@ +import json import pathlib from dataclasses import dataclass, field from enum import Enum @@ -7,6 +8,7 @@ from .menu import AbstractSubMenu, Selector, MenuSelectionType, Menu, ListManage from .networking import fetch_data_from_url from .output import warn, FormattedOutput from .storage import storage +from .models.mirrors import MirrorStatusListV3, MirrorStatusEntryV3 if TYPE_CHECKING: _: Any @@ -270,7 +272,11 @@ def select_mirror_regions(preset_values: Dict[str, List[str]] = {}) -> Dict[str, case MenuSelectionType.Skip: return preset_values case MenuSelectionType.Selection: - return {selected: mirrors[selected] for selected in choice.multi_value} + return { + selected: [ + f"{mirror.url}$repo/os/$arch" for mirror in sort_mirrors_by_performance(mirrors[selected]) + ] for selected in choice.multi_value + } return {} @@ -280,39 +286,55 @@ def select_custom_mirror(prompt: str = '', preset: List[CustomMirror] = []): return custom_mirrors -def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[str]]: - file_content = mirrorlist.split('\n') - file_content = list(filter(lambda x: x, file_content)) # filter out empty lines - first_srv_idx = [idx for idx, line in enumerate(file_content) if 'server' in line.lower()][0] - mirrors = file_content[first_srv_idx - 1:] - - mirror_list: Dict[str, List[str]] = {} - - for idx in range(0, len(mirrors), 2): - region = mirrors[idx].removeprefix('## ') - url = mirrors[idx + 1].removeprefix('#').removeprefix('Server = ') - mirror_list.setdefault(region, []).append(url) - - return mirror_list +def sort_mirrors_by_performance(mirror_list :List[MirrorStatusEntryV3]) -> List[MirrorStatusEntryV3]: + return sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed)) -def list_mirrors() -> Dict[str, List[str]]: - regions: Dict[str, List[str]] = {} +def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[MirrorStatusEntryV3]]: + mirror_status = MirrorStatusListV3(**json.loads(mirrorlist)) + + sorting_placeholder: Dict[str, List[MirrorStatusEntryV3]] = {} + + for mirror in mirror_status.urls: + # We filter out mirrors that have bad criteria values + if any([ + mirror.active is False, # Disabled by mirror-list admins + mirror.last_sync is None, # Has not synced recently + # mirror.score (error rate) over time reported from backend: https://github.com/archlinux/archweb/blob/31333d3516c91db9a2f2d12260bd61656c011fd1/mirrors/utils.py#L111C22-L111C66 + (mirror.score is None or mirror.score >= 100), + ]): + continue + + if mirror.country == "": + # TODO: This should be removed once RFC!29 is merged and completed + # Until then, there are mirrors which lacks data in the backend + # and there is no way of knowing where they're located. + # So we have to assume world-wide + mirror.country = "Worldwide" + + if mirror.url.startswith('http'): + sorting_placeholder.setdefault(mirror.country, []).append(mirror) + + sorted_by_regions: Dict[str, List[MirrorStatusEntryV3]] = dict({ + region: unsorted_mirrors + for region, unsorted_mirrors in sorted(sorting_placeholder.items(), key=lambda item: item[0]) + }) + + return sorted_by_regions + + +def list_mirrors() -> Dict[str, List[MirrorStatusEntryV3]]: + regions: Dict[str, List[MirrorStatusEntryV3]] = {} if storage['arguments']['offline']: with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp: mirrorlist = fp.read() else: - url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on" + url = "https://archlinux.org/mirrors/status/json/" try: mirrorlist = fetch_data_from_url(url) except ValueError as err: warn(f'Could not fetch an active mirror-list: {err}') return regions - regions = _parse_mirror_list(mirrorlist) - sorted_regions = {} - for region, urls in regions.items(): - sorted_regions[region] = sorted(urls, reverse=True) - - return sorted_regions + return _parse_mirror_list(mirrorlist) \ No newline at end of file diff --git a/archinstall/lib/models/mirrors.py b/archinstall/lib/models/mirrors.py new file mode 100644 index 00000000..dda174d2 --- /dev/null +++ b/archinstall/lib/models/mirrors.py @@ -0,0 +1,91 @@ +import datetime +import pydantic +import urllib.parse +import urllib.request +from typing import ( + Dict, + List +) + +from ..networking import ping, DownloadTimer +from ..output import info, debug + + +class MirrorStatusEntryV3(pydantic.BaseModel): + url :str + protocol :str + active :bool + country :str + country_code :str + isos :bool + ipv4 :bool + ipv6 :bool + details :str + delay :int|None = None + last_sync :datetime.datetime|None = None + duration_avg :float|None = None + duration_stddev :float|None = None + completion_pct :float|None = None + score :int|None = None + _latency :float|None = None + _speed :float|None = None + _hostname :str|None = None + _port :int|None = None + + @property + def speed(self) -> float|None: + if self._speed is None: + info(f"Checking download speed of {self._hostname}[{self.score}] by fetching: {self.url}core/os/x86_64/core.db") + req = urllib.request.Request(url=f"{self.url}core/os/x86_64/core.db") + with urllib.request.urlopen(req, None, 5) as handle, DownloadTimer(timeout=5) as timer: + size = len(handle.read()) + + self._speed = size / timer.time + debug(f" speed: {self._speed} ({int(self._speed / 1024 / 1024 * 100) / 100}MiB/s)") + + return self._speed + + @property + def latency(self) -> float|None: + """ + Latency measures the miliseconds between one ICMP request & response. + It only does so once because we check if self._latency is None, and a ICMP timeout result in -1 + We do this because some hosts blocks ICMP so we'll have to rely on .speed() instead which is slower. + """ + if self._latency is None: + info(f"Checking latency for {self.url}") + self._latency = ping(self._hostname, timeout=2) + debug(f" latency: {self._latency}") + + return self._latency + + @pydantic.field_validator('score', mode='before') + def validate_score(cls, value) -> int|None: + if value is not None: + value = round(value) + debug(f" score: {value}") + + return value + + @pydantic.model_validator(mode='after') + def debug_output(self, validation_info) -> 'MirrorStatusEntryV3': + self._hostname, *_port = urllib.parse.urlparse(self.url).netloc.split(':', 1) + self._port = int(_port[0]) if _port and len(_port) >= 1 else None + + debug(f"Loaded mirror {self._hostname}" + (f" with current score of {round(self.score)}" if self.score else '')) + return self + +class MirrorStatusListV3(pydantic.BaseModel): + cutoff :int + last_check :datetime.datetime + num_checks :int + urls :List[MirrorStatusEntryV3] + version :int + + @pydantic.model_validator(mode='before') + @classmethod + def check_model(cls, data: Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]) -> Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]: + if data.get('version') == 3: + return data + + raise ValueError(f"MirrorStatusListV3 only accepts version 3 data from https://archlinux.org/mirrors/status/json/") \ No newline at end of file diff --git a/archinstall/lib/networking.py b/archinstall/lib/networking.py index fb26bd3d..cb20337d 100644 --- a/archinstall/lib/networking.py +++ b/archinstall/lib/networking.py @@ -2,15 +2,68 @@ import os import socket import ssl import struct +import time +import select +import signal +import random from typing import Union, Dict, Any, List, Optional from urllib.error import URLError from urllib.parse import urlencode from urllib.request import urlopen -from .exceptions import SysCallError +from .exceptions import SysCallError, DownloadTimeout from .output import error, info from .pacman import Pacman +class DownloadTimer(): + ''' + Context manager for timing downloads with timeouts. + ''' + def __init__(self, timeout=5): + ''' + Args: + timeout: + The download timeout in seconds. The DownloadTimeout exception + will be raised in the context after this many seconds. + ''' + self.time = None + self.start_time = None + self.timeout = timeout + self.previous_handler = None + self.previous_timer = None + + def raise_timeout(self, signl, frame): + ''' + Raise the DownloadTimeout exception. + ''' + raise DownloadTimeout(f'Download timed out after {self.timeout} second(s).') + + def __enter__(self): + if self.timeout > 0: + self.previous_handler = signal.signal(signal.SIGALRM, self.raise_timeout) + self.previous_timer = signal.alarm(self.timeout) + + self.start_time = time.time() + return self + + def __exit__(self, typ, value, traceback): + if self.start_time: + time_delta = time.time() - self.start_time + signal.alarm(0) + self.time = time_delta + if self.timeout > 0: + signal.signal(signal.SIGALRM, self.previous_handler) + + previous_timer = self.previous_timer + if previous_timer and previous_timer > 0: + remaining_time = int(previous_timer - time_delta) + # The alarm should have been raised during the download. + if remaining_time <= 0: + signal.raise_signal(signal.SIGALRM) + else: + signal.alarm(remaining_time) + self.start_time = None + def get_hw_addr(ifname :str) -> str: import fcntl @@ -81,3 +134,60 @@ def fetch_data_from_url(url: str, params: Optional[Dict] = None) -> str: return data except URLError: raise ValueError(f'Unable to fetch data from url: {url}') + + +def calc_checksum(icmp_packet): + # Calculate the ICMP checksum + checksum = 0 + for i in range(0, len(icmp_packet), 2): + checksum += (icmp_packet[i] << 8) + ( + struct.unpack('B', icmp_packet[i + 1:i + 2])[0] + if len(icmp_packet[i + 1:i + 2]) else 0 + ) + + checksum = (checksum >> 16) + (checksum & 0xFFFF) + checksum = ~checksum & 0xFFFF + + return checksum + +def build_icmp(payload): + # Define the ICMP Echo Request packet + icmp_packet = struct.pack('!BBHHH', 8, 0, 0, 0, 1) + payload + + checksum = calc_checksum(icmp_packet) + + return struct.pack('!BBHHH', 8, 0, checksum, 0, 1) + payload + +def ping(hostname, timeout=5): + watchdog = select.epoll() + started = time.time() + random_identifier = f'archinstall-{random.randint(1000, 9999)}'.encode() + + # Create a raw socket (requires root, which should be fine on archiso) + icmp_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP) + watchdog.register(icmp_socket, select.EPOLLIN | select.EPOLLHUP) + + icmp_packet = build_icmp(random_identifier) + + # Send the ICMP packet + icmp_socket.sendto(icmp_packet, (hostname, 0)) + latency = -1 + + # Gracefully wait for X amount of time + # for a ICMP response or exit with no latency + while latency == -1 and time.time() - started < timeout: + try: + for fileno, event in watchdog.poll(0.1): + response, _ = icmp_socket.recvfrom(1024) + icmp_type = struct.unpack('!B', response[20:21])[0] + + # Check if it's an Echo Reply (ICMP type 0) + if icmp_type == 0 and response[-len(random_identifier):] == random_identifier: + latency = round((time.time() - started) * 1000) + break + except socket.error as error: + print(f"Error: {error}") + break + + icmp_socket.close() + return latency \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5428fac6..4637a0c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ dependencies = [ "simple-term-menu==1.6.4", "pyparted @ https://github.com//dcantrell/pyparted/archive/v3.13.0.tar.gz#sha512=26819e28d73420937874f52fda03eb50ab1b136574ea9867a69d46ae4976d38c4f26a2697fa70597eed90dd78a5ea209bafcc3227a17a7a5d63cff6d107c2b11", + "pydantic==2.8.2" ] [project.urls]