Use `/mirrors/status/json/` instead of `/mirrorlist/` to get mirror lists (#2599)

* Using JSON endpoint instead of ASCII endpoint for mirror listing, as the JSON endpoint is cached and easier to parse

* Added a TODO to handle unknown regional mirrors (which lacks info in the backend)

* Filtered out 'bad' mirrors. Also added a sorting mechanism that uses the mirrors 'score' rather than just the URL name. This will emulate the reflector.service/rankmirrors behavior and thus reducing the need to re-rank the mirrors.

* Added the ability to sort mirrors via latency or download speed using sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed)) - but I have not implemented the sorting via the menu yet, and I have not integrated the new MirrorStatus model into the handling of URL's. I still need to figure out where the {region: [url, url]} is being used, so that i can convert to {region: [mirror.url, mirror.url]} logic.

* Converting MirrorStatus model to {mirror: [url, url]}

* Added debug information for /var/log/archinstall/install.log

* Fixing flake8

* Fixed issue where 'dead' mirrors have no score, and thus can't be round():ed

* Forgot to return model validation data after validation

* Improving debug/info output

* Reverting change in #2350 - Writing over instead of appending to mirrorlist

* Mirror URL's reported by the JSON endpoint does not contain the repo format, only the base location for the mirror. So we have to adjust for this.

* pydantic did not honor 'private' variables in 'before' model validator, had to change to 'after' instead.

* Sorted out mypy typing matching the new MirrorStatus model

* Added pydantic as a dependency, it's time!

* Updated workflow to include pydantic

* Added return values from model @property decorators.
This commit is contained in:
Anton Hvornum 2024-08-02 15:24:44 +02:00 committed by GitHub
parent 4af3b02707
commit 98518e444a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 263 additions and 28 deletions

View File

@ -1,10 +1,11 @@
[flake8]
count = True
# Several of the following could be autofixed or improved by running the code through psf/black
ignore = E123,E126,E128,E203,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
ignore = E123,E126,E128,E203,E227,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
max-complexity = 40
max-line-length = 236
show-source = True
statistics = True
builtins = _
per-file-ignores = __init__.py:F401,F403,F405 simple_menu.py:C901,W503 guided.py:C901 network_configuration.py:F821
exclude = .git,__pycache__,docs,actions-runner

View File

@ -17,7 +17,7 @@ jobs:
pacman-key --init
pacman --noconfirm -Sy archlinux-keyring
pacman --noconfirm -Syyu
pacman --noconfirm -Sy python-pip python-pyparted python-simple-term-menu pkgconfig gcc
pacman --noconfirm -Sy python-pip python-pydantic python-pyparted python-simple-term-menu pkgconfig gcc
- name: Install build dependencies
run: |
python -m pip install --break-system-packages --upgrade pip

3
.gitignore vendored
View File

@ -36,3 +36,6 @@ venv
/*.sig
/*.json
requirements.txt
/.gitconfig
/actions-runner
/cmd_output.txt

View File

@ -22,6 +22,7 @@ depends=(
'pciutils'
'procps-ng'
'python'
'python-pydantic'
'python-pyparted'
'python-simple-term-menu'
'systemd'

View File

@ -38,3 +38,9 @@ class PackageError(Exception):
class Deprecated(Exception):
pass
class DownloadTimeout(Exception):
'''
Download timeout exception raised by DownloadTimer.
'''

View File

@ -493,7 +493,7 @@ class Installer:
if mirrorlist_config:
debug(f'Mirrorlist: {mirrorlist_config}')
with local_mirrorlist_conf.open('a') as fp:
with local_mirrorlist_conf.open('w') as fp:
fp.write(mirrorlist_config)
def genfstab(self, flags: str = '-pU'):

View File

@ -1,3 +1,4 @@
import json
import pathlib
from dataclasses import dataclass, field
from enum import Enum
@ -7,6 +8,7 @@ from .menu import AbstractSubMenu, Selector, MenuSelectionType, Menu, ListManage
from .networking import fetch_data_from_url
from .output import warn, FormattedOutput
from .storage import storage
from .models.mirrors import MirrorStatusListV3, MirrorStatusEntryV3
if TYPE_CHECKING:
_: Any
@ -270,7 +272,11 @@ def select_mirror_regions(preset_values: Dict[str, List[str]] = {}) -> Dict[str,
case MenuSelectionType.Skip:
return preset_values
case MenuSelectionType.Selection:
return {selected: mirrors[selected] for selected in choice.multi_value}
return {
selected: [
f"{mirror.url}$repo/os/$arch" for mirror in sort_mirrors_by_performance(mirrors[selected])
] for selected in choice.multi_value
}
return {}
@ -280,39 +286,55 @@ def select_custom_mirror(prompt: str = '', preset: List[CustomMirror] = []):
return custom_mirrors
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[str]]:
file_content = mirrorlist.split('\n')
file_content = list(filter(lambda x: x, file_content)) # filter out empty lines
first_srv_idx = [idx for idx, line in enumerate(file_content) if 'server' in line.lower()][0]
mirrors = file_content[first_srv_idx - 1:]
mirror_list: Dict[str, List[str]] = {}
for idx in range(0, len(mirrors), 2):
region = mirrors[idx].removeprefix('## ')
url = mirrors[idx + 1].removeprefix('#').removeprefix('Server = ')
mirror_list.setdefault(region, []).append(url)
return mirror_list
def sort_mirrors_by_performance(mirror_list :List[MirrorStatusEntryV3]) -> List[MirrorStatusEntryV3]:
return sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed))
def list_mirrors() -> Dict[str, List[str]]:
regions: Dict[str, List[str]] = {}
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[MirrorStatusEntryV3]]:
mirror_status = MirrorStatusListV3(**json.loads(mirrorlist))
sorting_placeholder: Dict[str, List[MirrorStatusEntryV3]] = {}
for mirror in mirror_status.urls:
# We filter out mirrors that have bad criteria values
if any([
mirror.active is False, # Disabled by mirror-list admins
mirror.last_sync is None, # Has not synced recently
# mirror.score (error rate) over time reported from backend: https://github.com/archlinux/archweb/blob/31333d3516c91db9a2f2d12260bd61656c011fd1/mirrors/utils.py#L111C22-L111C66
(mirror.score is None or mirror.score >= 100),
]):
continue
if mirror.country == "":
# TODO: This should be removed once RFC!29 is merged and completed
# Until then, there are mirrors which lacks data in the backend
# and there is no way of knowing where they're located.
# So we have to assume world-wide
mirror.country = "Worldwide"
if mirror.url.startswith('http'):
sorting_placeholder.setdefault(mirror.country, []).append(mirror)
sorted_by_regions: Dict[str, List[MirrorStatusEntryV3]] = dict({
region: unsorted_mirrors
for region, unsorted_mirrors in sorted(sorting_placeholder.items(), key=lambda item: item[0])
})
return sorted_by_regions
def list_mirrors() -> Dict[str, List[MirrorStatusEntryV3]]:
regions: Dict[str, List[MirrorStatusEntryV3]] = {}
if storage['arguments']['offline']:
with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp:
mirrorlist = fp.read()
else:
url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on"
url = "https://archlinux.org/mirrors/status/json/"
try:
mirrorlist = fetch_data_from_url(url)
except ValueError as err:
warn(f'Could not fetch an active mirror-list: {err}')
return regions
regions = _parse_mirror_list(mirrorlist)
sorted_regions = {}
for region, urls in regions.items():
sorted_regions[region] = sorted(urls, reverse=True)
return sorted_regions
return _parse_mirror_list(mirrorlist)

View File

@ -0,0 +1,91 @@
import datetime
import pydantic
import urllib.parse
import urllib.request
from typing import (
Dict,
List
)
from ..networking import ping, DownloadTimer
from ..output import info, debug
class MirrorStatusEntryV3(pydantic.BaseModel):
url :str
protocol :str
active :bool
country :str
country_code :str
isos :bool
ipv4 :bool
ipv6 :bool
details :str
delay :int|None = None
last_sync :datetime.datetime|None = None
duration_avg :float|None = None
duration_stddev :float|None = None
completion_pct :float|None = None
score :int|None = None
_latency :float|None = None
_speed :float|None = None
_hostname :str|None = None
_port :int|None = None
@property
def speed(self) -> float|None:
if self._speed is None:
info(f"Checking download speed of {self._hostname}[{self.score}] by fetching: {self.url}core/os/x86_64/core.db")
req = urllib.request.Request(url=f"{self.url}core/os/x86_64/core.db")
with urllib.request.urlopen(req, None, 5) as handle, DownloadTimer(timeout=5) as timer:
size = len(handle.read())
self._speed = size / timer.time
debug(f" speed: {self._speed} ({int(self._speed / 1024 / 1024 * 100) / 100}MiB/s)")
return self._speed
@property
def latency(self) -> float|None:
"""
Latency measures the miliseconds between one ICMP request & response.
It only does so once because we check if self._latency is None, and a ICMP timeout result in -1
We do this because some hosts blocks ICMP so we'll have to rely on .speed() instead which is slower.
"""
if self._latency is None:
info(f"Checking latency for {self.url}")
self._latency = ping(self._hostname, timeout=2)
debug(f" latency: {self._latency}")
return self._latency
@pydantic.field_validator('score', mode='before')
def validate_score(cls, value) -> int|None:
if value is not None:
value = round(value)
debug(f" score: {value}")
return value
@pydantic.model_validator(mode='after')
def debug_output(self, validation_info) -> 'MirrorStatusEntryV3':
self._hostname, *_port = urllib.parse.urlparse(self.url).netloc.split(':', 1)
self._port = int(_port[0]) if _port and len(_port) >= 1 else None
debug(f"Loaded mirror {self._hostname}" + (f" with current score of {round(self.score)}" if self.score else ''))
return self
class MirrorStatusListV3(pydantic.BaseModel):
cutoff :int
last_check :datetime.datetime
num_checks :int
urls :List[MirrorStatusEntryV3]
version :int
@pydantic.model_validator(mode='before')
@classmethod
def check_model(cls, data: Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]) -> Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]:
if data.get('version') == 3:
return data
raise ValueError(f"MirrorStatusListV3 only accepts version 3 data from https://archlinux.org/mirrors/status/json/")

View File

@ -2,15 +2,68 @@ import os
import socket
import ssl
import struct
import time
import select
import signal
import random
from typing import Union, Dict, Any, List, Optional
from urllib.error import URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from .exceptions import SysCallError
from .exceptions import SysCallError, DownloadTimeout
from .output import error, info
from .pacman import Pacman
class DownloadTimer():
'''
Context manager for timing downloads with timeouts.
'''
def __init__(self, timeout=5):
'''
Args:
timeout:
The download timeout in seconds. The DownloadTimeout exception
will be raised in the context after this many seconds.
'''
self.time = None
self.start_time = None
self.timeout = timeout
self.previous_handler = None
self.previous_timer = None
def raise_timeout(self, signl, frame):
'''
Raise the DownloadTimeout exception.
'''
raise DownloadTimeout(f'Download timed out after {self.timeout} second(s).')
def __enter__(self):
if self.timeout > 0:
self.previous_handler = signal.signal(signal.SIGALRM, self.raise_timeout)
self.previous_timer = signal.alarm(self.timeout)
self.start_time = time.time()
return self
def __exit__(self, typ, value, traceback):
if self.start_time:
time_delta = time.time() - self.start_time
signal.alarm(0)
self.time = time_delta
if self.timeout > 0:
signal.signal(signal.SIGALRM, self.previous_handler)
previous_timer = self.previous_timer
if previous_timer and previous_timer > 0:
remaining_time = int(previous_timer - time_delta)
# The alarm should have been raised during the download.
if remaining_time <= 0:
signal.raise_signal(signal.SIGALRM)
else:
signal.alarm(remaining_time)
self.start_time = None
def get_hw_addr(ifname :str) -> str:
import fcntl
@ -81,3 +134,60 @@ def fetch_data_from_url(url: str, params: Optional[Dict] = None) -> str:
return data
except URLError:
raise ValueError(f'Unable to fetch data from url: {url}')
def calc_checksum(icmp_packet):
# Calculate the ICMP checksum
checksum = 0
for i in range(0, len(icmp_packet), 2):
checksum += (icmp_packet[i] << 8) + (
struct.unpack('B', icmp_packet[i + 1:i + 2])[0]
if len(icmp_packet[i + 1:i + 2]) else 0
)
checksum = (checksum >> 16) + (checksum & 0xFFFF)
checksum = ~checksum & 0xFFFF
return checksum
def build_icmp(payload):
# Define the ICMP Echo Request packet
icmp_packet = struct.pack('!BBHHH', 8, 0, 0, 0, 1) + payload
checksum = calc_checksum(icmp_packet)
return struct.pack('!BBHHH', 8, 0, checksum, 0, 1) + payload
def ping(hostname, timeout=5):
watchdog = select.epoll()
started = time.time()
random_identifier = f'archinstall-{random.randint(1000, 9999)}'.encode()
# Create a raw socket (requires root, which should be fine on archiso)
icmp_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP)
watchdog.register(icmp_socket, select.EPOLLIN | select.EPOLLHUP)
icmp_packet = build_icmp(random_identifier)
# Send the ICMP packet
icmp_socket.sendto(icmp_packet, (hostname, 0))
latency = -1
# Gracefully wait for X amount of time
# for a ICMP response or exit with no latency
while latency == -1 and time.time() - started < timeout:
try:
for fileno, event in watchdog.poll(0.1):
response, _ = icmp_socket.recvfrom(1024)
icmp_type = struct.unpack('!B', response[20:21])[0]
# Check if it's an Echo Reply (ICMP type 0)
if icmp_type == 0 and response[-len(random_identifier):] == random_identifier:
latency = round((time.time() - started) * 1000)
break
except socket.error as error:
print(f"Error: {error}")
break
icmp_socket.close()
return latency

View File

@ -20,6 +20,7 @@ classifiers = [
dependencies = [
"simple-term-menu==1.6.4",
"pyparted @ https://github.com//dcantrell/pyparted/archive/v3.13.0.tar.gz#sha512=26819e28d73420937874f52fda03eb50ab1b136574ea9867a69d46ae4976d38c4f26a2697fa70597eed90dd78a5ea209bafcc3227a17a7a5d63cff6d107c2b11",
"pydantic==2.8.2"
]
[project.urls]