Use `/mirrors/status/json/` instead of `/mirrorlist/` to get mirror lists (#2599)
* Using JSON endpoint instead of ASCII endpoint for mirror listing, as the JSON endpoint is cached and easier to parse
* Added a TODO to handle unknown regional mirrors (which lacks info in the backend)
* Filtered out 'bad' mirrors. Also added a sorting mechanism that uses the mirrors 'score' rather than just the URL name. This will emulate the reflector.service/rankmirrors behavior and thus reducing the need to re-rank the mirrors.
* Added the ability to sort mirrors via latency or download speed using sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed)) - but I have not implemented the sorting via the menu yet, and I have not integrated the new MirrorStatus model into the handling of URL's. I still need to figure out where the {region: [url, url]} is being used, so that i can convert to {region: [mirror.url, mirror.url]} logic.
* Converting MirrorStatus model to {mirror: [url, url]}
* Added debug information for /var/log/archinstall/install.log
* Fixing flake8
* Fixed issue where 'dead' mirrors have no score, and thus can't be round():ed
* Forgot to return model validation data after validation
* Improving debug/info output
* Reverting change in #2350 - Writing over instead of appending to mirrorlist
* Mirror URL's reported by the JSON endpoint does not contain the repo format, only the base location for the mirror. So we have to adjust for this.
* pydantic did not honor 'private' variables in 'before' model validator, had to change to 'after' instead.
* Sorted out mypy typing matching the new MirrorStatus model
* Added pydantic as a dependency, it's time!
* Updated workflow to include pydantic
* Added return values from model @property decorators.
This commit is contained in:
parent
4af3b02707
commit
98518e444a
3
.flake8
3
.flake8
|
|
@ -1,10 +1,11 @@
|
||||||
[flake8]
|
[flake8]
|
||||||
count = True
|
count = True
|
||||||
# Several of the following could be autofixed or improved by running the code through psf/black
|
# Several of the following could be autofixed or improved by running the code through psf/black
|
||||||
ignore = E123,E126,E128,E203,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
|
ignore = E123,E126,E128,E203,E227,E231,E261,E302,E402,E722,F541,W191,W292,W293,W503,W504
|
||||||
max-complexity = 40
|
max-complexity = 40
|
||||||
max-line-length = 236
|
max-line-length = 236
|
||||||
show-source = True
|
show-source = True
|
||||||
statistics = True
|
statistics = True
|
||||||
builtins = _
|
builtins = _
|
||||||
per-file-ignores = __init__.py:F401,F403,F405 simple_menu.py:C901,W503 guided.py:C901 network_configuration.py:F821
|
per-file-ignores = __init__.py:F401,F403,F405 simple_menu.py:C901,W503 guided.py:C901 network_configuration.py:F821
|
||||||
|
exclude = .git,__pycache__,docs,actions-runner
|
||||||
|
|
@ -17,7 +17,7 @@ jobs:
|
||||||
pacman-key --init
|
pacman-key --init
|
||||||
pacman --noconfirm -Sy archlinux-keyring
|
pacman --noconfirm -Sy archlinux-keyring
|
||||||
pacman --noconfirm -Syyu
|
pacman --noconfirm -Syyu
|
||||||
pacman --noconfirm -Sy python-pip python-pyparted python-simple-term-menu pkgconfig gcc
|
pacman --noconfirm -Sy python-pip python-pydantic python-pyparted python-simple-term-menu pkgconfig gcc
|
||||||
- name: Install build dependencies
|
- name: Install build dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --break-system-packages --upgrade pip
|
python -m pip install --break-system-packages --upgrade pip
|
||||||
|
|
|
||||||
|
|
@ -36,3 +36,6 @@ venv
|
||||||
/*.sig
|
/*.sig
|
||||||
/*.json
|
/*.json
|
||||||
requirements.txt
|
requirements.txt
|
||||||
|
/.gitconfig
|
||||||
|
/actions-runner
|
||||||
|
/cmd_output.txt
|
||||||
1
PKGBUILD
1
PKGBUILD
|
|
@ -22,6 +22,7 @@ depends=(
|
||||||
'pciutils'
|
'pciutils'
|
||||||
'procps-ng'
|
'procps-ng'
|
||||||
'python'
|
'python'
|
||||||
|
'python-pydantic'
|
||||||
'python-pyparted'
|
'python-pyparted'
|
||||||
'python-simple-term-menu'
|
'python-simple-term-menu'
|
||||||
'systemd'
|
'systemd'
|
||||||
|
|
|
||||||
|
|
@ -38,3 +38,9 @@ class PackageError(Exception):
|
||||||
|
|
||||||
class Deprecated(Exception):
|
class Deprecated(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadTimeout(Exception):
|
||||||
|
'''
|
||||||
|
Download timeout exception raised by DownloadTimer.
|
||||||
|
'''
|
||||||
|
|
@ -493,7 +493,7 @@ class Installer:
|
||||||
if mirrorlist_config:
|
if mirrorlist_config:
|
||||||
debug(f'Mirrorlist: {mirrorlist_config}')
|
debug(f'Mirrorlist: {mirrorlist_config}')
|
||||||
|
|
||||||
with local_mirrorlist_conf.open('a') as fp:
|
with local_mirrorlist_conf.open('w') as fp:
|
||||||
fp.write(mirrorlist_config)
|
fp.write(mirrorlist_config)
|
||||||
|
|
||||||
def genfstab(self, flags: str = '-pU'):
|
def genfstab(self, flags: str = '-pU'):
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
@ -7,6 +8,7 @@ from .menu import AbstractSubMenu, Selector, MenuSelectionType, Menu, ListManage
|
||||||
from .networking import fetch_data_from_url
|
from .networking import fetch_data_from_url
|
||||||
from .output import warn, FormattedOutput
|
from .output import warn, FormattedOutput
|
||||||
from .storage import storage
|
from .storage import storage
|
||||||
|
from .models.mirrors import MirrorStatusListV3, MirrorStatusEntryV3
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
_: Any
|
_: Any
|
||||||
|
|
@ -270,7 +272,11 @@ def select_mirror_regions(preset_values: Dict[str, List[str]] = {}) -> Dict[str,
|
||||||
case MenuSelectionType.Skip:
|
case MenuSelectionType.Skip:
|
||||||
return preset_values
|
return preset_values
|
||||||
case MenuSelectionType.Selection:
|
case MenuSelectionType.Selection:
|
||||||
return {selected: mirrors[selected] for selected in choice.multi_value}
|
return {
|
||||||
|
selected: [
|
||||||
|
f"{mirror.url}$repo/os/$arch" for mirror in sort_mirrors_by_performance(mirrors[selected])
|
||||||
|
] for selected in choice.multi_value
|
||||||
|
}
|
||||||
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
@ -280,39 +286,55 @@ def select_custom_mirror(prompt: str = '', preset: List[CustomMirror] = []):
|
||||||
return custom_mirrors
|
return custom_mirrors
|
||||||
|
|
||||||
|
|
||||||
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[str]]:
|
def sort_mirrors_by_performance(mirror_list :List[MirrorStatusEntryV3]) -> List[MirrorStatusEntryV3]:
|
||||||
file_content = mirrorlist.split('\n')
|
return sorted(mirror_list, key=lambda mirror: (mirror.score, mirror.speed))
|
||||||
file_content = list(filter(lambda x: x, file_content)) # filter out empty lines
|
|
||||||
first_srv_idx = [idx for idx, line in enumerate(file_content) if 'server' in line.lower()][0]
|
|
||||||
mirrors = file_content[first_srv_idx - 1:]
|
|
||||||
|
|
||||||
mirror_list: Dict[str, List[str]] = {}
|
|
||||||
|
|
||||||
for idx in range(0, len(mirrors), 2):
|
|
||||||
region = mirrors[idx].removeprefix('## ')
|
|
||||||
url = mirrors[idx + 1].removeprefix('#').removeprefix('Server = ')
|
|
||||||
mirror_list.setdefault(region, []).append(url)
|
|
||||||
|
|
||||||
return mirror_list
|
|
||||||
|
|
||||||
|
|
||||||
def list_mirrors() -> Dict[str, List[str]]:
|
def _parse_mirror_list(mirrorlist: str) -> Dict[str, List[MirrorStatusEntryV3]]:
|
||||||
regions: Dict[str, List[str]] = {}
|
mirror_status = MirrorStatusListV3(**json.loads(mirrorlist))
|
||||||
|
|
||||||
|
sorting_placeholder: Dict[str, List[MirrorStatusEntryV3]] = {}
|
||||||
|
|
||||||
|
for mirror in mirror_status.urls:
|
||||||
|
# We filter out mirrors that have bad criteria values
|
||||||
|
if any([
|
||||||
|
mirror.active is False, # Disabled by mirror-list admins
|
||||||
|
mirror.last_sync is None, # Has not synced recently
|
||||||
|
# mirror.score (error rate) over time reported from backend: https://github.com/archlinux/archweb/blob/31333d3516c91db9a2f2d12260bd61656c011fd1/mirrors/utils.py#L111C22-L111C66
|
||||||
|
(mirror.score is None or mirror.score >= 100),
|
||||||
|
]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if mirror.country == "":
|
||||||
|
# TODO: This should be removed once RFC!29 is merged and completed
|
||||||
|
# Until then, there are mirrors which lacks data in the backend
|
||||||
|
# and there is no way of knowing where they're located.
|
||||||
|
# So we have to assume world-wide
|
||||||
|
mirror.country = "Worldwide"
|
||||||
|
|
||||||
|
if mirror.url.startswith('http'):
|
||||||
|
sorting_placeholder.setdefault(mirror.country, []).append(mirror)
|
||||||
|
|
||||||
|
sorted_by_regions: Dict[str, List[MirrorStatusEntryV3]] = dict({
|
||||||
|
region: unsorted_mirrors
|
||||||
|
for region, unsorted_mirrors in sorted(sorting_placeholder.items(), key=lambda item: item[0])
|
||||||
|
})
|
||||||
|
|
||||||
|
return sorted_by_regions
|
||||||
|
|
||||||
|
|
||||||
|
def list_mirrors() -> Dict[str, List[MirrorStatusEntryV3]]:
|
||||||
|
regions: Dict[str, List[MirrorStatusEntryV3]] = {}
|
||||||
|
|
||||||
if storage['arguments']['offline']:
|
if storage['arguments']['offline']:
|
||||||
with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp:
|
with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp:
|
||||||
mirrorlist = fp.read()
|
mirrorlist = fp.read()
|
||||||
else:
|
else:
|
||||||
url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on"
|
url = "https://archlinux.org/mirrors/status/json/"
|
||||||
try:
|
try:
|
||||||
mirrorlist = fetch_data_from_url(url)
|
mirrorlist = fetch_data_from_url(url)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
warn(f'Could not fetch an active mirror-list: {err}')
|
warn(f'Could not fetch an active mirror-list: {err}')
|
||||||
return regions
|
return regions
|
||||||
|
|
||||||
regions = _parse_mirror_list(mirrorlist)
|
return _parse_mirror_list(mirrorlist)
|
||||||
sorted_regions = {}
|
|
||||||
for region, urls in regions.items():
|
|
||||||
sorted_regions[region] = sorted(urls, reverse=True)
|
|
||||||
|
|
||||||
return sorted_regions
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
import datetime
|
||||||
|
import pydantic
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from typing import (
|
||||||
|
Dict,
|
||||||
|
List
|
||||||
|
)
|
||||||
|
|
||||||
|
from ..networking import ping, DownloadTimer
|
||||||
|
from ..output import info, debug
|
||||||
|
|
||||||
|
|
||||||
|
class MirrorStatusEntryV3(pydantic.BaseModel):
|
||||||
|
url :str
|
||||||
|
protocol :str
|
||||||
|
active :bool
|
||||||
|
country :str
|
||||||
|
country_code :str
|
||||||
|
isos :bool
|
||||||
|
ipv4 :bool
|
||||||
|
ipv6 :bool
|
||||||
|
details :str
|
||||||
|
delay :int|None = None
|
||||||
|
last_sync :datetime.datetime|None = None
|
||||||
|
duration_avg :float|None = None
|
||||||
|
duration_stddev :float|None = None
|
||||||
|
completion_pct :float|None = None
|
||||||
|
score :int|None = None
|
||||||
|
_latency :float|None = None
|
||||||
|
_speed :float|None = None
|
||||||
|
_hostname :str|None = None
|
||||||
|
_port :int|None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def speed(self) -> float|None:
|
||||||
|
if self._speed is None:
|
||||||
|
info(f"Checking download speed of {self._hostname}[{self.score}] by fetching: {self.url}core/os/x86_64/core.db")
|
||||||
|
req = urllib.request.Request(url=f"{self.url}core/os/x86_64/core.db")
|
||||||
|
with urllib.request.urlopen(req, None, 5) as handle, DownloadTimer(timeout=5) as timer:
|
||||||
|
size = len(handle.read())
|
||||||
|
|
||||||
|
self._speed = size / timer.time
|
||||||
|
debug(f" speed: {self._speed} ({int(self._speed / 1024 / 1024 * 100) / 100}MiB/s)")
|
||||||
|
|
||||||
|
return self._speed
|
||||||
|
|
||||||
|
@property
|
||||||
|
def latency(self) -> float|None:
|
||||||
|
"""
|
||||||
|
Latency measures the miliseconds between one ICMP request & response.
|
||||||
|
It only does so once because we check if self._latency is None, and a ICMP timeout result in -1
|
||||||
|
We do this because some hosts blocks ICMP so we'll have to rely on .speed() instead which is slower.
|
||||||
|
"""
|
||||||
|
if self._latency is None:
|
||||||
|
info(f"Checking latency for {self.url}")
|
||||||
|
self._latency = ping(self._hostname, timeout=2)
|
||||||
|
debug(f" latency: {self._latency}")
|
||||||
|
|
||||||
|
return self._latency
|
||||||
|
|
||||||
|
@pydantic.field_validator('score', mode='before')
|
||||||
|
def validate_score(cls, value) -> int|None:
|
||||||
|
if value is not None:
|
||||||
|
value = round(value)
|
||||||
|
debug(f" score: {value}")
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
@pydantic.model_validator(mode='after')
|
||||||
|
def debug_output(self, validation_info) -> 'MirrorStatusEntryV3':
|
||||||
|
self._hostname, *_port = urllib.parse.urlparse(self.url).netloc.split(':', 1)
|
||||||
|
self._port = int(_port[0]) if _port and len(_port) >= 1 else None
|
||||||
|
|
||||||
|
debug(f"Loaded mirror {self._hostname}" + (f" with current score of {round(self.score)}" if self.score else ''))
|
||||||
|
return self
|
||||||
|
|
||||||
|
class MirrorStatusListV3(pydantic.BaseModel):
|
||||||
|
cutoff :int
|
||||||
|
last_check :datetime.datetime
|
||||||
|
num_checks :int
|
||||||
|
urls :List[MirrorStatusEntryV3]
|
||||||
|
version :int
|
||||||
|
|
||||||
|
@pydantic.model_validator(mode='before')
|
||||||
|
@classmethod
|
||||||
|
def check_model(cls, data: Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]) -> Dict[str, int|datetime.datetime|List[MirrorStatusEntryV3]]:
|
||||||
|
if data.get('version') == 3:
|
||||||
|
return data
|
||||||
|
|
||||||
|
raise ValueError(f"MirrorStatusListV3 only accepts version 3 data from https://archlinux.org/mirrors/status/json/")
|
||||||
|
|
@ -2,15 +2,68 @@ import os
|
||||||
import socket
|
import socket
|
||||||
import ssl
|
import ssl
|
||||||
import struct
|
import struct
|
||||||
|
import time
|
||||||
|
import select
|
||||||
|
import signal
|
||||||
|
import random
|
||||||
from typing import Union, Dict, Any, List, Optional
|
from typing import Union, Dict, Any, List, Optional
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
|
||||||
from .exceptions import SysCallError
|
from .exceptions import SysCallError, DownloadTimeout
|
||||||
from .output import error, info
|
from .output import error, info
|
||||||
from .pacman import Pacman
|
from .pacman import Pacman
|
||||||
|
|
||||||
|
class DownloadTimer():
|
||||||
|
'''
|
||||||
|
Context manager for timing downloads with timeouts.
|
||||||
|
'''
|
||||||
|
def __init__(self, timeout=5):
|
||||||
|
'''
|
||||||
|
Args:
|
||||||
|
timeout:
|
||||||
|
The download timeout in seconds. The DownloadTimeout exception
|
||||||
|
will be raised in the context after this many seconds.
|
||||||
|
'''
|
||||||
|
self.time = None
|
||||||
|
self.start_time = None
|
||||||
|
self.timeout = timeout
|
||||||
|
self.previous_handler = None
|
||||||
|
self.previous_timer = None
|
||||||
|
|
||||||
|
def raise_timeout(self, signl, frame):
|
||||||
|
'''
|
||||||
|
Raise the DownloadTimeout exception.
|
||||||
|
'''
|
||||||
|
raise DownloadTimeout(f'Download timed out after {self.timeout} second(s).')
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
if self.timeout > 0:
|
||||||
|
self.previous_handler = signal.signal(signal.SIGALRM, self.raise_timeout)
|
||||||
|
self.previous_timer = signal.alarm(self.timeout)
|
||||||
|
|
||||||
|
self.start_time = time.time()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, typ, value, traceback):
|
||||||
|
if self.start_time:
|
||||||
|
time_delta = time.time() - self.start_time
|
||||||
|
signal.alarm(0)
|
||||||
|
self.time = time_delta
|
||||||
|
if self.timeout > 0:
|
||||||
|
signal.signal(signal.SIGALRM, self.previous_handler)
|
||||||
|
|
||||||
|
previous_timer = self.previous_timer
|
||||||
|
if previous_timer and previous_timer > 0:
|
||||||
|
remaining_time = int(previous_timer - time_delta)
|
||||||
|
# The alarm should have been raised during the download.
|
||||||
|
if remaining_time <= 0:
|
||||||
|
signal.raise_signal(signal.SIGALRM)
|
||||||
|
else:
|
||||||
|
signal.alarm(remaining_time)
|
||||||
|
self.start_time = None
|
||||||
|
|
||||||
|
|
||||||
def get_hw_addr(ifname :str) -> str:
|
def get_hw_addr(ifname :str) -> str:
|
||||||
import fcntl
|
import fcntl
|
||||||
|
|
@ -81,3 +134,60 @@ def fetch_data_from_url(url: str, params: Optional[Dict] = None) -> str:
|
||||||
return data
|
return data
|
||||||
except URLError:
|
except URLError:
|
||||||
raise ValueError(f'Unable to fetch data from url: {url}')
|
raise ValueError(f'Unable to fetch data from url: {url}')
|
||||||
|
|
||||||
|
|
||||||
|
def calc_checksum(icmp_packet):
|
||||||
|
# Calculate the ICMP checksum
|
||||||
|
checksum = 0
|
||||||
|
for i in range(0, len(icmp_packet), 2):
|
||||||
|
checksum += (icmp_packet[i] << 8) + (
|
||||||
|
struct.unpack('B', icmp_packet[i + 1:i + 2])[0]
|
||||||
|
if len(icmp_packet[i + 1:i + 2]) else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
checksum = (checksum >> 16) + (checksum & 0xFFFF)
|
||||||
|
checksum = ~checksum & 0xFFFF
|
||||||
|
|
||||||
|
return checksum
|
||||||
|
|
||||||
|
def build_icmp(payload):
|
||||||
|
# Define the ICMP Echo Request packet
|
||||||
|
icmp_packet = struct.pack('!BBHHH', 8, 0, 0, 0, 1) + payload
|
||||||
|
|
||||||
|
checksum = calc_checksum(icmp_packet)
|
||||||
|
|
||||||
|
return struct.pack('!BBHHH', 8, 0, checksum, 0, 1) + payload
|
||||||
|
|
||||||
|
def ping(hostname, timeout=5):
|
||||||
|
watchdog = select.epoll()
|
||||||
|
started = time.time()
|
||||||
|
random_identifier = f'archinstall-{random.randint(1000, 9999)}'.encode()
|
||||||
|
|
||||||
|
# Create a raw socket (requires root, which should be fine on archiso)
|
||||||
|
icmp_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP)
|
||||||
|
watchdog.register(icmp_socket, select.EPOLLIN | select.EPOLLHUP)
|
||||||
|
|
||||||
|
icmp_packet = build_icmp(random_identifier)
|
||||||
|
|
||||||
|
# Send the ICMP packet
|
||||||
|
icmp_socket.sendto(icmp_packet, (hostname, 0))
|
||||||
|
latency = -1
|
||||||
|
|
||||||
|
# Gracefully wait for X amount of time
|
||||||
|
# for a ICMP response or exit with no latency
|
||||||
|
while latency == -1 and time.time() - started < timeout:
|
||||||
|
try:
|
||||||
|
for fileno, event in watchdog.poll(0.1):
|
||||||
|
response, _ = icmp_socket.recvfrom(1024)
|
||||||
|
icmp_type = struct.unpack('!B', response[20:21])[0]
|
||||||
|
|
||||||
|
# Check if it's an Echo Reply (ICMP type 0)
|
||||||
|
if icmp_type == 0 and response[-len(random_identifier):] == random_identifier:
|
||||||
|
latency = round((time.time() - started) * 1000)
|
||||||
|
break
|
||||||
|
except socket.error as error:
|
||||||
|
print(f"Error: {error}")
|
||||||
|
break
|
||||||
|
|
||||||
|
icmp_socket.close()
|
||||||
|
return latency
|
||||||
|
|
@ -20,6 +20,7 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"simple-term-menu==1.6.4",
|
"simple-term-menu==1.6.4",
|
||||||
"pyparted @ https://github.com//dcantrell/pyparted/archive/v3.13.0.tar.gz#sha512=26819e28d73420937874f52fda03eb50ab1b136574ea9867a69d46ae4976d38c4f26a2697fa70597eed90dd78a5ea209bafcc3227a17a7a5d63cff6d107c2b11",
|
"pyparted @ https://github.com//dcantrell/pyparted/archive/v3.13.0.tar.gz#sha512=26819e28d73420937874f52fda03eb50ab1b136574ea9867a69d46ae4976d38c4f26a2697fa70597eed90dd78a5ea209bafcc3227a17a7a5d63cff6d107c2b11",
|
||||||
|
"pydantic==2.8.2"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue