fix: harden find-api-calls.sh and recover-kotlin-names.sh from PR #16 review
- find-api-calls.sh: add missing '|| true' on the --paths inventory and --urls extraction pipelines; with set -euo pipefail a zero-match grep aborted the whole script (including the default --all run) with exit 1. - find-api-calls.sh: widen STRICT_URL to also match IPv4 literals, apex 2-label domains and bare single-label hosts followed by :port or /path (localhost, internal backends) while still rejecting dictionary-fragment noise from the Kotlin stdlib. - recover-kotlin-names.sh: sanitize the by_package/ filename with os.path.basename; a crafted absolute path in untrusted @DebugMetadata package names could otherwise escape the output directory. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
a2a0a97f23
commit
2047f99d01
|
|
@ -202,7 +202,7 @@ if [[ "$SEARCH_ALL" == true || "$SEARCH_PATHS" == true ]]; then
|
||||||
# Print a flat unique list rather than file:line — this is the inventory.
|
# Print a flat unique list rather than file:line — this is the inventory.
|
||||||
grep -rhoE --include='*.java' --include='*.kt' "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
|
grep -rhoE --include='*.java' --include='*.kt' "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
|
||||||
| grep -Ev "$EXCLUDE" \
|
| grep -Ev "$EXCLUDE" \
|
||||||
| sort -u
|
| sort -u || true
|
||||||
echo
|
echo
|
||||||
section "Endpoint-Shaped Path Literals — call sites"
|
section "Endpoint-Shaped Path Literals — call sites"
|
||||||
grep $GREP_OPTS -E "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
|
grep $GREP_OPTS -E "$PATHS_REGEX" "$SOURCE_DIR" 2>/dev/null \
|
||||||
|
|
@ -218,12 +218,19 @@ fi
|
||||||
if [[ "$SEARCH_ALL" == true || "$SEARCH_URLS" == true ]]; then
|
if [[ "$SEARCH_ALL" == true || "$SEARCH_URLS" == true ]]; then
|
||||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
DENYLIST="$HERE/../references/third_party_hosts.txt"
|
DENYLIST="$HERE/../references/third_party_hosts.txt"
|
||||||
# Hostname must have at least one dot and end in a 2+ letter TLD.
|
# Accept three host shapes, all rejecting whitespace / angle brackets /
|
||||||
STRICT_URL='https?://[A-Za-z0-9-]+(\.[A-Za-z0-9-]+)+\.[A-Za-z]{2,}(:[0-9]{1,5})?(/[^"<>[:space:]]*)?'
|
# non-printables in the path:
|
||||||
|
# * IPv4 literal (dev/staging endpoints, high signal) 192.168.0.1
|
||||||
|
# * dotted host: >=2 labels ending in a 2+ letter TLD (incl apex) example.com
|
||||||
|
# * bare single-label host, BUT only when followed by ':port' or localhost:3000
|
||||||
|
# '/path' — keeps internal hosts (localhost, internal-backend) svc/health
|
||||||
|
# while still dropping Kotlin-stdlib dictionary fragments like
|
||||||
|
# "http://An Introduction..." (bare word, no port/path follows).
|
||||||
|
STRICT_URL='https?://(([0-9]{1,3}(\.[0-9]{1,3}){3}|[A-Za-z0-9-]+(\.[A-Za-z0-9-]+)*\.[A-Za-z]{2,})(:[0-9]{1,5})?(/[^"<>[:space:]]*)?|[A-Za-z0-9-]+(:[0-9]{1,5}(/[^"<>[:space:]]*)?|/[^"<>[:space:]]*))'
|
||||||
|
|
||||||
TMP="$(mktemp)"
|
TMP="$(mktemp)"
|
||||||
trap 'rm -f "$TMP"' EXIT
|
trap 'rm -f "$TMP"' EXIT
|
||||||
grep -rhoE --include='*.java' --include='*.kt' "$STRICT_URL" "$SOURCE_DIR" 2>/dev/null \
|
{ grep -rhoE --include='*.java' --include='*.kt' "$STRICT_URL" "$SOURCE_DIR" 2>/dev/null || true; } \
|
||||||
| sort -u > "$TMP"
|
| sort -u > "$TMP"
|
||||||
|
|
||||||
# Extract host: strip scheme, take part up to first ':' or '/'.
|
# Extract host: strip scheme, take part up to first ':' or '/'.
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ for obf, real in mapping.items():
|
||||||
by_pkg[pkg].append((real, obf, file_real[obf]))
|
by_pkg[pkg].append((real, obf, file_real[obf]))
|
||||||
|
|
||||||
for pkg, rows in by_pkg.items():
|
for pkg, rows in by_pkg.items():
|
||||||
safe = pkg.replace(".", "_") or "default"
|
safe = os.path.basename(pkg).replace(".", "_") or "default"
|
||||||
with open(os.path.join(OUT, "by_package", f"{safe}.txt"), "w") as f:
|
with open(os.path.join(OUT, "by_package", f"{safe}.txt"), "w") as f:
|
||||||
for real, obf, p in sorted(rows):
|
for real, obf, p in sorted(rows):
|
||||||
f.write(f"{real}\t{obf}\t{p}\n")
|
f.write(f"{real}\t{obf}\t{p}\n")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue