fix: harden find-api-calls.sh and recover-kotlin-names.sh from PR #16 review

- find-api-calls.sh: add missing '|| true' on the --paths inventory and
  --urls extraction pipelines; with set -euo pipefail a zero-match grep
  aborted the whole script (including the default --all run) with exit 1.
- find-api-calls.sh: widen STRICT_URL to also match IPv4 literals, apex
  2-label domains and bare single-label hosts followed by :port or /path
  (localhost, internal backends) while still rejecting dictionary-fragment
  noise from the Kotlin stdlib.
- recover-kotlin-names.sh: sanitize the by_package/ filename with
  os.path.basename; a crafted absolute path in untrusted @DebugMetadata
  package names could otherwise escape the output directory.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Simone Avogadro 2026-06-10 10:22:16 +02:00
parent a2a0a97f23
commit 2047f99d01
2 changed files with 12 additions and 5 deletions

View File

@ -202,7 +202,7 @@ if [[ "$SEARCH_ALL" == true || "$SEARCH_PATHS" == true ]]; then
# Print a flat unique list rather than file:line — this is the inventory. # Print a flat unique list rather than file:line — this is the inventory.
grep -rhoE --include='*.java' --include='*.kt' "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \ grep -rhoE --include='*.java' --include='*.kt' "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
| grep -Ev "$EXCLUDE" \ | grep -Ev "$EXCLUDE" \
| sort -u | sort -u || true
echo echo
section "Endpoint-Shaped Path Literals — call sites" section "Endpoint-Shaped Path Literals — call sites"
grep $GREP_OPTS -E "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \ grep $GREP_OPTS -E "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
@ -218,12 +218,19 @@ fi
if [[ "$SEARCH_ALL" == true || "$SEARCH_URLS" == true ]]; then if [[ "$SEARCH_ALL" == true || "$SEARCH_URLS" == true ]]; then
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DENYLIST="$HERE/../references/third_party_hosts.txt" DENYLIST="$HERE/../references/third_party_hosts.txt"
# Hostname must have at least one dot and end in a 2+ letter TLD. # Accept three host shapes, all rejecting whitespace / angle brackets /
STRICT_URL='https?://[A-Za-z0-9-]+(\.[A-Za-z0-9-]+)+\.[A-Za-z]{2,}(:[0-9]{1,5})?(/[^"<>[:space:]]*)?' # non-printables in the path:
# * IPv4 literal (dev/staging endpoints, high signal) 192.168.0.1
# * dotted host: >=2 labels ending in a 2+ letter TLD (incl apex) example.com
# * bare single-label host, BUT only when followed by ':port' or localhost:3000
# '/path' — keeps internal hosts (localhost, internal-backend) svc/health
# while still dropping Kotlin-stdlib dictionary fragments like
# "http://An Introduction..." (bare word, no port/path follows).
STRICT_URL='https?://(([0-9]{1,3}(\.[0-9]{1,3}){3}|[A-Za-z0-9-]+(\.[A-Za-z0-9-]+)*\.[A-Za-z]{2,})(:[0-9]{1,5})?(/[^"<>[:space:]]*)?|[A-Za-z0-9-]+(:[0-9]{1,5}(/[^"<>[:space:]]*)?|/[^"<>[:space:]]*))'
TMP="$(mktemp)" TMP="$(mktemp)"
trap 'rm -f "$TMP"' EXIT trap 'rm -f "$TMP"' EXIT
grep -rhoE --include='*.java' --include='*.kt' "$STRICT_URL" "$SOURCE_DIR" 2>/dev/null \ { grep -rhoE --include='*.java' --include='*.kt' "$STRICT_URL" "$SOURCE_DIR" 2>/dev/null || true; } \
| sort -u > "$TMP" | sort -u > "$TMP"
# Extract host: strip scheme, take part up to first ':' or '/'. # Extract host: strip scheme, take part up to first ':' or '/'.

View File

@ -127,7 +127,7 @@ for obf, real in mapping.items():
by_pkg[pkg].append((real, obf, file_real[obf])) by_pkg[pkg].append((real, obf, file_real[obf]))
for pkg, rows in by_pkg.items(): for pkg, rows in by_pkg.items():
safe = pkg.replace(".", "_") or "default" safe = os.path.basename(pkg).replace(".", "_") or "default"
with open(os.path.join(OUT, "by_package", f"{safe}.txt"), "w") as f: with open(os.path.join(OUT, "by_package", f"{safe}.txt"), "w") as f:
for real, obf, p in sorted(rows): for real, obf, p in sorted(rows):
f.write(f"{real}\t{obf}\t{p}\n") f.write(f"{real}\t{obf}\t{p}\n")