Use `/mirrors/status/json/` instead of `/mirrorlist/` to get mirror lists (#2599)
* Using JSON endpoint instead of ASCII endpoint for mirror listing, as the JSON endpoint is cached and easier to parse
* Added a TODO to handle unknown regional mirrors (which lacks info in the backend)
* Filtered out 'bad' mirrors. Also added a sorting mechanism that uses the mirrors 'score' rather than just the URL name. This will emulate the reflector.service/rankmirrors behavior and thus reducing the need to re-rank the mirrors.
* Added the ability to sort mirrors via latency or download speed using sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed)) - but I have not implemented the sorting via the menu yet, and I have not integrated the new MirrorStatus model into the handling of URL's. I still need to figure out where the {region: [url, url]} is being used, so that i can convert to {region: [mirror.url, mirror.url]} logic.
* Converting MirrorStatus model to {mirror: [url, url]}
* Added debug information for /var/log/archinstall/install.log
* Fixing flake8
* Fixed issue where 'dead' mirrors have no score, and thus can't be round():ed
* Forgot to return model validation data after validation
* Improving debug/info output
* Reverting change in #2350 - Writing over instead of appending to mirrorlist
* Mirror URL's reported by the JSON endpoint does not contain the repo format, only the base location for the mirror. So we have to adjust for this.
* pydantic did not honor 'private' variables in 'before' model validator, had to change to 'after' instead.
* Sorted out mypy typing matching the new MirrorStatus model
* Added pydantic as a dependency, it's time!
* Updated workflow to include pydantic
* Added return values from model @property decorators.
This commit is contained in:
parent
4af3b02707
commit
98518e444a
3
.flake8
3
.flake8
|
|
@ -1,10 +1,11 @@
|
|||
[flake8]
|
||||
count = True
|
||||
# Several of the following could be autofixed or improved by running the code through psf/black
|
||||
ignore = E123,E126,E128,E203,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
|
||||
ignore = E123,E126,E128,E203,E227,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
|
||||
max-complexity = 40
|
||||
max-line-length = 236
|
||||
show-source = True
|
||||
statistics = True
|
||||
builtins = _
|
||||
per-file-ignores = __init__.py:F401,F403,F405 simple_menu.py:C901,W503 guided.py:C901 network_configuration.py:F821
|
||||
exclude = .git,__pycache__,docs,actions-runner
|
||||
|
|
@ -17,7 +17,7 @@ jobs:
|
|||
pacman-key --init
|
||||
pacman --noconfirm -Sy archlinux-keyring
|
||||
pacman --noconfirm -Syyu
|
||||
pacman --noconfirm -Sy python-pip python-pyparted python-simple-term-menu pkgconfig gcc
|
||||
pacman --noconfirm -Sy python-pip python-pydantic python-pyparted python-simple-term-menu pkgconfig gcc
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install --break-system-packages --upgrade pip
|
||||
|
|
|
|||
|
|
@ -36,3 +36,6 @@ venv
|
|||
/*.sig
|
||||
/*.json
|
||||
requirements.txt
|
||||
/.gitconfig
|
||||
/actions-runner
|
||||
/cmd_output.txt
|
||||
1
PKGBUILD
1
PKGBUILD
|
|
@ -22,6 +22,7 @@ depends=(
|
|||
'pciutils'
|
||||
'procps-ng'
|
||||
'python'
|
||||
'python-pydantic'
|
||||
'python-pyparted'
|
||||
'python-simple-term-menu'
|
||||
'systemd'
|
||||
|
|
|
|||
|
|
@ -38,3 +38,9 @@ class PackageError(Exception):
|
|||
|
||||
class Deprecated(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DownloadTimeout(Exception):
|
||||
'''
|
||||
Download timeout exception raised by DownloadTimer.
|
||||
'''
|
||||
|
|
@ -493,7 +493,7 @@ class Installer:
|
|||
if mirrorlist_config:
|
||||
debug(f'Mirrorlist: {mirrorlist_config}')
|
||||
|
||||
with local_mirrorlist_conf.open('a') as fp:
|
||||
with local_mirrorlist_conf.open('w') as fp:
|
||||
fp.write(mirrorlist_config)
|
||||
|
||||
def genfstab(self, flags: str = '-pU'):
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import pathlib
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
|
@ -7,6 +8,7 @@ from .menu import AbstractSubMenu, Selector, MenuSelectionType, Menu, ListManage
|
|||
from .networking import fetch_data_from_url
|
||||
from .output import warn, FormattedOutput
|
||||
from .storage import storage
|
||||
from .models.mirrors import MirrorStatusListV3, MirrorStatusEntryV3
|
||||
|
||||
if TYPE_CHECKING:
|
||||
_: Any
|
||||
|
|
@ -270,7 +272,11 @@ def select_mirror_regions(preset_values: Dict[str, List[str]] = {}) -> Dict[str,
|
|||
case MenuSelectionType.Skip:
|
||||
return preset_values
|
||||
case MenuSelectionType.Selection:
|
||||
return {selected: mirrors[selected] for selected in choice.multi_value}
|
||||
return {
|
||||
selected: [
|
||||
f"{mirror.url}$repo/os/$arch" for mirror in sort_mirrors_by_performance(mirrors[selected])
|
||||
] for selected in choice.multi_value
|
||||
}
|
||||
|
||||
return {}
|
||||
|
||||
|
|
@ -280,39 +286,55 @@ def select_custom_mirror(prompt: str = '', preset: List[CustomMirror] = []):
|
|||
return custom_mirrors
|
||||
|
||||
|
||||
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[str]]:
|
||||
file_content = mirrorlist.split('\n')
|
||||
file_content = list(filter(lambda x: x, file_content)) # filter out empty lines
|
||||
first_srv_idx = [idx for idx, line in enumerate(file_content) if 'server' in line.lower()][0]
|
||||
mirrors = file_content[first_srv_idx - 1:]
|
||||
|
||||
mirror_list: Dict[str, List[str]] = {}
|
||||
|
||||
for idx in range(0, len(mirrors), 2):
|
||||
region = mirrors[idx].removeprefix('## ')
|
||||
url = mirrors[idx + 1].removeprefix('#').removeprefix('Server = ')
|
||||
mirror_list.setdefault(region, []).append(url)
|
||||
|
||||
return mirror_list
|
||||
def sort_mirrors_by_performance(mirror_list :List[MirrorStatusEntryV3]) -> List[MirrorStatusEntryV3]:
|
||||
return sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed))
|
||||
|
||||
|
||||
def list_mirrors() -> Dict[str, List[str]]:
|
||||
regions: Dict[str, List[str]] = {}
|
||||
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[MirrorStatusEntryV3]]:
|
||||
mirror_status = MirrorStatusListV3(**json.loads(mirrorlist))
|
||||
|
||||
sorting_placeholder: Dict[str, List[MirrorStatusEntryV3]] = {}
|
||||
|
||||
for mirror in mirror_status.urls:
|
||||
# We filter out mirrors that have bad criteria values
|
||||
if any([
|
||||
mirror.active is False, # Disabled by mirror-list admins
|
||||
mirror.last_sync is None, # Has not synced recently
|
||||
# mirror.score (error rate) over time reported from backend: https://github.com/archlinux/archweb/blob/31333d3516c91db9a2f2d12260bd61656c011fd1/mirrors/utils.py#L111C22-L111C66
|
||||
(mirror.score is None or mirror.score >= 100),
|
||||
]):
|
||||
continue
|
||||
|
||||
if mirror.country == "":
|
||||
# TODO: This should be removed once RFC!29 is merged and completed
|
||||
# Until then, there are mirrors which lacks data in the backend
|
||||
# and there is no way of knowing where they're located.
|
||||
# So we have to assume world-wide
|
||||
mirror.country = "Worldwide"
|
||||
|
||||
if mirror.url.startswith('http'):
|
||||
sorting_placeholder.setdefault(mirror.country, []).append(mirror)
|
||||
|
||||
sorted_by_regions: Dict[str, List[MirrorStatusEntryV3]] = dict({
|
||||
region: unsorted_mirrors
|
||||
for region, unsorted_mirrors in sorted(sorting_placeholder.items(), key=lambda item: item[0])
|
||||
})
|
||||
|
||||
return sorted_by_regions
|
||||
|
||||
|
||||
def list_mirrors() -> Dict[str, List[MirrorStatusEntryV3]]:
|
||||
regions: Dict[str, List[MirrorStatusEntryV3]] = {}
|
||||
|
||||
if storage['arguments']['offline']:
|
||||
with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp:
|
||||
mirrorlist = fp.read()
|
||||
else:
|
||||
url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on"
|
||||
url = "https://archlinux.org/mirrors/status/json/"
|
||||
try:
|
||||
mirrorlist = fetch_data_from_url(url)
|
||||
except ValueError as err:
|
||||
warn(f'Could not fetch an active mirror-list: {err}')
|
||||
return regions
|
||||
|
||||
regions = _parse_mirror_list(mirrorlist)
|
||||
sorted_regions = {}
|
||||
for region, urls in regions.items():
|
||||
sorted_regions[region] = sorted(urls, reverse=True)
|
||||
|
||||
return sorted_regions
|
||||
return _parse_mirror_list(mirrorlist)
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
import datetime
|
||||
import pydantic
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from typing import (
|
||||
Dict,
|
||||
List
|
||||
)
|
||||
|
||||
from ..networking import ping, DownloadTimer
|
||||
from ..output import info, debug
|
||||
|
||||
|
||||
class MirrorStatusEntryV3(pydantic.BaseModel):
|
||||
url :str
|
||||
protocol :str
|
||||
active :bool
|
||||
country :str
|
||||
country_code :str
|
||||
isos :bool
|
||||
ipv4 :bool
|
||||
ipv6 :bool
|
||||
details :str
|
||||
delay :int|None = None
|
||||
last_sync :datetime.datetime|None = None
|
||||
duration_avg :float|None = None
|
||||
duration_stddev :float|None = None
|
||||
completion_pct :float|None = None
|
||||
score :int|None = None
|
||||
_latency :float|None = None
|
||||
_speed :float|None = None
|
||||
_hostname :str|None = None
|
||||
_port :int|None = None
|
||||
|
||||
@property
|
||||
def speed(self) -> float|None:
|
||||
if self._speed is None:
|
||||
info(f"Checking download speed of {self._hostname}[{self.score}] by fetching: {self.url}core/os/x86_64/core.db")
|
||||
req = urllib.request.Request(url=f"{self.url}core/os/x86_64/core.db")
|
||||
with urllib.request.urlopen(req, None, 5) as handle, DownloadTimer(timeout=5) as timer:
|
||||
size = len(handle.read())
|
||||
|
||||
self._speed = size / timer.time
|
||||
debug(f" speed: {self._speed} ({int(self._speed / 1024 / 1024 * 100) / 100}MiB/s)")
|
||||
|
||||
return self._speed
|
||||
|
||||
@property
|
||||
def latency(self) -> float|None:
|
||||
"""
|
||||
Latency measures the miliseconds between one ICMP request & response.
|
||||
It only does so once because we check if self._latency is None, and a ICMP timeout result in -1
|
||||
We do this because some hosts blocks ICMP so we'll have to rely on .speed() instead which is slower.
|
||||
"""
|
||||
if self._latency is None:
|
||||
info(f"Checking latency for {self.url}")
|
||||
self._latency = ping(self._hostname, timeout=2)
|
||||
debug(f" latency: {self._latency}")
|
||||
|
||||
return self._latency
|
||||
|
||||
@pydantic.field_validator('score', mode='before')
|
||||
def validate_score(cls, value) -> int|None:
|
||||
if value is not None:
|
||||
value = round(value)
|
||||
debug(f" score: {value}")
|
||||
|
||||
return value
|
||||
|
||||
@pydantic.model_validator(mode='after')
|
||||
def debug_output(self, validation_info) -> 'MirrorStatusEntryV3':
|
||||
self._hostname, *_port = urllib.parse.urlparse(self.url).netloc.split(':', 1)
|
||||
self._port = int(_port[0]) if _port and len(_port) >= 1 else None
|
||||
|
||||
debug(f"Loaded mirror {self._hostname}" + (f" with current score of {round(self.score)}" if self.score else ''))
|
||||
return self
|
||||
|
||||
class MirrorStatusListV3(pydantic.BaseModel):
|
||||
cutoff :int
|
||||
last_check :datetime.datetime
|
||||
num_checks :int
|
||||
urls :List[MirrorStatusEntryV3]
|
||||
version :int
|
||||
|
||||
@pydantic.model_validator(mode='before')
|
||||
@classmethod
|
||||
def check_model(cls, data: Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]) -> Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]:
|
||||
if data.get('version') == 3:
|
||||
return data
|
||||
|
||||
raise ValueError(f"MirrorStatusListV3 only accepts version 3 data from https://archlinux.org/mirrors/status/json/")
|
||||
|
|
@ -2,15 +2,68 @@ import os
|
|||
import socket
|
||||
import ssl
|
||||
import struct
|
||||
import time
|
||||
import select
|
||||
import signal
|
||||
import random
|
||||
from typing import Union, Dict, Any, List, Optional
|
||||
from urllib.error import URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .exceptions import SysCallError
|
||||
from .exceptions import SysCallError, DownloadTimeout
|
||||
from .output import error, info
|
||||
from .pacman import Pacman
|
||||
|
||||
class DownloadTimer():
|
||||
'''
|
||||
Context manager for timing downloads with timeouts.
|
||||
'''
|
||||
def __init__(self, timeout=5):
|
||||
'''
|
||||
Args:
|
||||
timeout:
|
||||
The download timeout in seconds. The DownloadTimeout exception
|
||||
will be raised in the context after this many seconds.
|
||||
'''
|
||||
self.time = None
|
||||
self.start_time = None
|
||||
self.timeout = timeout
|
||||
self.previous_handler = None
|
||||
self.previous_timer = None
|
||||
|
||||
def raise_timeout(self, signl, frame):
|
||||
'''
|
||||
Raise the DownloadTimeout exception.
|
||||
'''
|
||||
raise DownloadTimeout(f'Download timed out after {self.timeout} second(s).')
|
||||
|
||||
def __enter__(self):
|
||||
if self.timeout > 0:
|
||||
self.previous_handler = signal.signal(signal.SIGALRM, self.raise_timeout)
|
||||
self.previous_timer = signal.alarm(self.timeout)
|
||||
|
||||
self.start_time = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, typ, value, traceback):
|
||||
if self.start_time:
|
||||
time_delta = time.time() - self.start_time
|
||||
signal.alarm(0)
|
||||
self.time = time_delta
|
||||
if self.timeout > 0:
|
||||
signal.signal(signal.SIGALRM, self.previous_handler)
|
||||
|
||||
previous_timer = self.previous_timer
|
||||
if previous_timer and previous_timer > 0:
|
||||
remaining_time = int(previous_timer - time_delta)
|
||||
# The alarm should have been raised during the download.
|
||||
if remaining_time <= 0:
|
||||
signal.raise_signal(signal.SIGALRM)
|
||||
else:
|
||||
signal.alarm(remaining_time)
|
||||
self.start_time = None
|
||||
|
||||
|
||||
def get_hw_addr(ifname :str) -> str:
|
||||
import fcntl
|
||||
|
|
@ -81,3 +134,60 @@ def fetch_data_from_url(url: str, params: Optional[Dict] = None) -> str:
|
|||
return data
|
||||
except URLError:
|
||||
raise ValueError(f'Unable to fetch data from url: {url}')
|
||||
|
||||
|
||||
def calc_checksum(icmp_packet):
|
||||
# Calculate the ICMP checksum
|
||||
checksum = 0
|
||||
for i in range(0, len(icmp_packet), 2):
|
||||
checksum += (icmp_packet[i] << 8) + (
|
||||
struct.unpack('B', icmp_packet[i + 1:i + 2])[0]
|
||||
if len(icmp_packet[i + 1:i + 2]) else 0
|
||||
)
|
||||
|
||||
checksum = (checksum >> 16) + (checksum & 0xFFFF)
|
||||
checksum = ~checksum & 0xFFFF
|
||||
|
||||
return checksum
|
||||
|
||||
def build_icmp(payload):
|
||||
# Define the ICMP Echo Request packet
|
||||
icmp_packet = struct.pack('!BBHHH', 8, 0, 0, 0, 1) + payload
|
||||
|
||||
checksum = calc_checksum(icmp_packet)
|
||||
|
||||
return struct.pack('!BBHHH', 8, 0, checksum, 0, 1) + payload
|
||||
|
||||
def ping(hostname, timeout=5):
|
||||
watchdog = select.epoll()
|
||||
started = time.time()
|
||||
random_identifier = f'archinstall-{random.randint(1000, 9999)}'.encode()
|
||||
|
||||
# Create a raw socket (requires root, which should be fine on archiso)
|
||||
icmp_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP)
|
||||
watchdog.register(icmp_socket, select.EPOLLIN | select.EPOLLHUP)
|
||||
|
||||
icmp_packet = build_icmp(random_identifier)
|
||||
|
||||
# Send the ICMP packet
|
||||
icmp_socket.sendto(icmp_packet, (hostname, 0))
|
||||
latency = -1
|
||||
|
||||
# Gracefully wait for X amount of time
|
||||
# for a ICMP response or exit with no latency
|
||||
while latency == -1 and time.time() - started < timeout:
|
||||
try:
|
||||
for fileno, event in watchdog.poll(0.1):
|
||||
response, _ = icmp_socket.recvfrom(1024)
|
||||
icmp_type = struct.unpack('!B', response[20:21])[0]
|
||||
|
||||
# Check if it's an Echo Reply (ICMP type 0)
|
||||
if icmp_type == 0 and response[-len(random_identifier):] == random_identifier:
|
||||
latency = round((time.time() - started) * 1000)
|
||||
break
|
||||
except socket.error as error:
|
||||
print(f"Error: {error}")
|
||||
break
|
||||
|
||||
icmp_socket.close()
|
||||
return latency
|
||||
|
|
@ -20,6 +20,7 @@ classifiers = [
|
|||
dependencies = [
|
||||
"simple-term-menu==1.6.4",
|
||||
"pyparted @ https://github.com//dcantrell/pyparted/archive/v3.13.0.tar.gz#sha512=26819e28d73420937874f52fda03eb50ab1b136574ea9867a69d46ae4976d38c4f26a2697fa70597eed90dd78a5ea209bafcc3227a17a7a5d63cff6d107c2b11",
|
||||
"pydantic==2.8.2"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
|
|||
Loading…
Reference in New Issue