Source code for kspecdr.fluxcal.download_bosz

"""
Download BOSZ 2024 stellar template subgrid for kspecdr flux calibration.

Usage
-----
From the repository root::

    python -m kspecdr.fluxcal.download_bosz            # full download
    python -m kspecdr.fluxcal.download_bosz --dry-run  # preview file list only
    python -m kspecdr.fluxcal.download_bosz --force    # re-download existing files

Subgrid definition (F-type spectrophotometric standards)
---------------------------------------------------------
- Resolution: R = 10,000
- Teff: 5000-8000 K, step 250 K (13 values)
- log(g): 3.5, 4.0, 4.5, 5.0
- [M/H]: -1.00 to +0.50, step 0.25 (7 values)
- [alpha/M]: +0.00 (all [M/H]); additionally +0.25 for [M/H] <= -0.50
- [C/M]: +0.00
- vmicro: 1 km/s
- atmos: mp (MARCS plane-parallel) for Teff 5000-7250 K,
  ap (ATLAS9 plane-parallel) for Teff 7500-8000 K

Output
------
Files are saved to::

    <repo>/data/templates/bosz2024/r10000/<metallicity>/<filename>.txt.gz

The shared wavelength grid is saved to::

    <repo>/data/templates/bosz2024/bosz2024_wave_r10000.txt

References
----------
- Meszaros et al. 2024: The updated BOSZ synthetic stellar spectral library
- MAST HLSP BOSZ: https://archive.stsci.edu/hlsp/bosz
"""

from __future__ import annotations

import sys
import time
import urllib.error
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------

# This file: src/kspecdr/fluxcal/download_bosz.py
# Repo root:  ../../../../ → four .parent calls
_REPO_ROOT = Path(__file__).parent.parent.parent.parent
TEMPLATE_ROOT = _REPO_ROOT / "data" / "templates" / "bosz2024"

# ---------------------------------------------------------------------------
# Subgrid definition
# ---------------------------------------------------------------------------

BASE_URL   = "https://archive.stsci.edu/hlsps/bosz/bosz2024"
RESOLUTION = "r10000"

TEFF_LIST  = list(range(5000, 8001, 250))   # 5000–8000 K, step 250
LOGG_LIST  = [3.5, 4.0, 4.5, 5.0]
FEH_LIST   = [-1.00, -0.75, -0.50, -0.25, 0.00, 0.25, 0.50]
VMICRO     = 1
CARBON     = 0.00

MAX_WORKERS = 8
RETRY_MAX   = 3
RETRY_DELAY = 2.0   # seconds


[docs] def alpha_values_for(feh: float) -> list[float]: """Return [α/M] values to include for a given [M/H].""" return [0.00, 0.25] if feh <= -0.50 else [0.00]
[docs] def atmos_for(teff: int) -> str: """ATLAS9 plane-parallel for Teff ≥ 7500 K; MARCS plane-parallel otherwise.""" return "ap" if teff >= 7500 else "mp"
# --------------------------------------------------------------------------- # Filename / URL helpers # --------------------------------------------------------------------------- def _fmt_logg(v: float) -> str: return f"g+{v:.1f}" def _fmt_feh(v: float) -> str: return f"m{v:+.2f}" def _fmt_alpha(v: float) -> str: return f"a{v:+.2f}" def _fmt_carbon(v: float) -> str: return f"c{v:+.2f}" def _fmt_teff(v: int) -> str: return f"t{v:d}"
[docs] def build_filename(atmos, teff, logg, feh, alpha, carbon, vmicro, resolution): return ( f"bosz2024_{atmos}_{_fmt_teff(teff)}_{_fmt_logg(logg)}" f"_{_fmt_feh(feh)}_{_fmt_alpha(alpha)}_{_fmt_carbon(carbon)}" f"_v{vmicro}_{resolution}_resam.txt.gz" )
[docs] def build_url(feh: float, filename: str) -> str: return f"{BASE_URL}/{RESOLUTION}/{_fmt_feh(feh)}/{filename}"
# --------------------------------------------------------------------------- # Download worker # --------------------------------------------------------------------------- def _download_one( url: str, dest: Path, skip_existing: bool = True, ) -> tuple[str, Path, str]: """ Download *url* to *dest*. Returns ------- (url, dest, status) where status ∈ {'ok', 'skipped', 'not_found', 'error:<msg>'} """ if skip_existing and dest.exists() and dest.stat().st_size > 0: return url, dest, "skipped" dest.parent.mkdir(parents=True, exist_ok=True) for attempt in range(1, RETRY_MAX + 1): try: with urllib.request.urlopen(url, timeout=60) as resp: data = resp.read() dest.write_bytes(data) return url, dest, "ok" except urllib.error.HTTPError as exc: if exc.code == 404: return url, dest, "not_found" if attempt < RETRY_MAX: time.sleep(RETRY_DELAY) else: return url, dest, f"error:HTTP{exc.code}" except Exception as exc: # noqa: BLE001 if attempt < RETRY_MAX: time.sleep(RETRY_DELAY) else: return url, dest, f"error:{exc}" return url, dest, "error:exhausted" # unreachable, satisfies type checker # --------------------------------------------------------------------------- # Task list # ---------------------------------------------------------------------------
[docs] def build_task_list() -> list[tuple[str, Path]]: tasks = [] for teff in TEFF_LIST: atmos = atmos_for(teff) for logg in LOGG_LIST: for feh in FEH_LIST: for alpha in alpha_values_for(feh): fname = build_filename( atmos, teff, logg, feh, alpha, CARBON, VMICRO, RESOLUTION ) url = build_url(feh, fname) dest = TEMPLATE_ROOT / RESOLUTION / _fmt_feh(feh) / fname tasks.append((url, dest)) return tasks
# --------------------------------------------------------------------------- # Main entry point # ---------------------------------------------------------------------------
[docs] def main(dry_run: bool = False, skip_existing: bool = True) -> None: TEMPLATE_ROOT.mkdir(parents=True, exist_ok=True) # --- wavelength grid (shared across all models at this resolution) --- wave_url = f"{BASE_URL}/wavelength_grids/bosz2024_wave_{RESOLUTION}.txt" wave_dest = TEMPLATE_ROOT / f"bosz2024_wave_{RESOLUTION}.txt" if wave_dest.exists() and wave_dest.stat().st_size > 0: print(f" wavelength grid : already exists — {wave_dest.name}") else: print(f" wavelength grid : {wave_dest.relative_to(_REPO_ROOT)}") if not dry_run: _, _, status = _download_one(wave_url, wave_dest, skip_existing=False) print(f" → {status}") # --- model spectra --- tasks = build_task_list() feh_alpha_pairs = set( (_fmt_feh(feh), _fmt_alpha(a)) for feh in FEH_LIST for a in alpha_values_for(feh) ) print("\nSubgrid summary") print(f" Teff : {TEFF_LIST[0]}{TEFF_LIST[-1]} K, step 250 ({len(TEFF_LIST)} values)") print(f" logg : {LOGG_LIST}") print(f" [M/H] : {[f'{f:+.2f}' for f in FEH_LIST]}") print(" [α/M] : 0.00 for all; +0.25 additionally for [M/H] ≤ −0.50") print(" atmos : mp (Teff 5000–7250 K) / ap (Teff 7500–8000 K)") print(f" Total : {len(tasks)} model files") print(f" Dest : {TEMPLATE_ROOT.relative_to(_REPO_ROOT)}/") if dry_run: print("\nDry run — first 8 URLs:") for url, _ in tasks[:8]: print(f" {url}") print(f" ... ({len(tasks) - 8} more)") return print() counts: dict[str, int] = {"ok": 0, "skipped": 0, "not_found": 0, "error": 0} with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: futures = { pool.submit(_download_one, url, dest, skip_existing): (url, dest) for url, dest in tasks } for i, future in enumerate(as_completed(futures), 1): url, dest, status = future.result() key = status if status in counts else "error" counts[key] += 1 print(f" [{i:>4}/{len(tasks)}] {status:<16} {dest.name}", flush=True) print("\n--- Summary ---") for k, v in counts.items(): print(f" {k:>10}: {v}") if counts["not_found"]: print( "\n Note: 'not_found' entries are expected for (Teff, logg) combinations\n" " that lie outside the BOSZ grid boundaries." )
if __name__ == "__main__": _dry = "--dry-run" in sys.argv _force = "--force" in sys.argv main(dry_run=_dry, skip_existing=not _force)