diff --git a/README.rst b/README.rst index 21d2af2..74e2909 100644 --- a/README.rst +++ b/README.rst @@ -193,6 +193,16 @@ keys: under which the release's assets will be saved. If this is not specified, no release assets will be downloaded. + ``packages`` + A template string that will be instantiated for each package + version (from GitHub Container Registry) to produce the path + for the directory (relative to the current working directory) + under which the package version metadata and manifest will be + saved as JSON files. For container packages, this includes + both the package metadata (tags, version info) and the + OCI/Docker manifest. If this is not specified, no package + data will be saved. + ``workflows`` A specification of the workflows for which to retrieve assets. This can be either a list of workflow basenames, including the file @@ -221,6 +231,33 @@ keys: When ``workflows`` is not specified, assets are retrieved for all workflows in the repository. + ``packages`` + A specification of the packages for which to retrieve metadata + and manifests. This can be either a list of package names or a + mapping containing the following fields: + + ``include`` + A list of packages to retrieve, given as either names or + (when ``regex`` is true) `Python regular expressions`_ to + match against package names. If ``include`` is omitted, it + defaults to including all packages. + + ``exclude`` + A list of packages to not retrieve, given as either names or + (when ``regex`` is true) `Python regular expressions`_ to + match against package names. If ``exclude`` is omitted, no + packages are excluded. Packages that match both ``include`` + and ``exclude`` are excluded. + + ``regex`` + A boolean. If true (default false), the elements of the + ``include`` and ``exclude`` fields are treated as `Python + regular expressions`_ that are matched (unanchored) against + package names; if false, they are used as exact names + + When ``packages`` is not specified, metadata and manifests are + retrieved for all packages in the repository. + ``travis`` Configuration for retrieving logs from Travis-CI.com. Subfield: @@ -399,10 +436,13 @@ A sample config file: logs: '{build_prefix}/{wf_name}/{number}/logs/' artifacts: '{build_prefix}/{wf_name}/{number}/artifacts/' releases: '{path_prefix}/{release_tag}/' + packages: '{year}//{month}/{ci}/packages/{package_name}/{tag}/' workflows: - test_crippled.yml - test_extensions.yml - test_macos.yml + packages: + - tinuous-inception travis: paths: logs: '{build_prefix}/{number}/{job}.txt' @@ -432,9 +472,10 @@ A sample config file: Path Templates -------------- -The path at which assets for a given workflow run, build job, or release are -saved is determined by instantiating the appropriate path template string given -in the configuration file for the corresponding CI system. A template string +The path at which assets for a given workflow run, build job, release, or +package version are saved is determined by instantiating the appropriate path +template string given in the configuration file for the corresponding CI system. +A template string is a filepath containing placeholders of the form ``{field}``, where the available placeholders are: @@ -468,7 +509,7 @@ Placeholder Definition ``appveyor``, or ``circleci``) ``{type}`` The event type that triggered the build (``cron``, ``manual``, ``pr``, or ``push``), or ``release`` for - GitHub releases + GitHub releases, or ``package`` for GitHub Packages ``{type_id}`` Further information on the triggering event; for ``cron`` and ``manual``, this is a timestamp for the start of the build; for ``pr``, this is the number of @@ -519,6 +560,18 @@ Placeholder Definition ``{step_name}`` *(CircleCI only)* The escaped [1]_ name of the step [2]_ ``{index}`` *(CircleCI only)* The index of the parallel container that the step ran on [2]_ +``{package_name}`` *(``packages`` path only)* The escaped [1]_ name of the + package [3]_ +``{package_type}`` *(``packages`` path only)* The type of the package + (e.g., ``container``) [3]_ +``{version_id}`` *(``packages`` path only)* The unique ID of the package + version [3]_ +``{version_name}`` *(``packages`` path only)* The escaped [1]_ name/digest + of the package version [3]_ +``{tag}`` *(``packages`` path only)* The primary tag of the + package version, or version_name if no tags [3]_ +``{tags}`` *(``packages`` path only)* Comma-separated list of all + tags for the package version [3]_ ====================== ======================================================= .. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects @@ -529,7 +582,9 @@ Placeholder Definition replacing each whitespace character with a space. .. [2] These placeholders are only available for ``path`` and - ``artifacts_path``, not ``releases_path`` + ``artifacts_path``, not ``releases_path`` or ``packages`` + +.. [3] These placeholders are only available for ``packages`` path A placeholder's value may be truncated to the first ``n`` characters by writing ``{placeholder[:n]}``, e.g., ``{commit[:7]}``. diff --git a/src/tinuous/__main__.py b/src/tinuous/__main__.py index d7669f0..f61193f 100644 --- a/src/tinuous/__main__.py +++ b/src/tinuous/__main__.py @@ -104,8 +104,13 @@ def fetch(config_file: str, state_path: Optional[str], sanitize_secrets: bool) - logs_added = 0 artifacts_added = 0 relassets_added = 0 + packages_added = 0 for name, cicfg in cfg.ci.items(): - if not cicfg.gets_builds() and not cicfg.gets_releases(): + if ( + not cicfg.gets_builds() + and not cicfg.gets_releases() + and not cicfg.gets_packages() + ): log.info("No paths configured for %s; skipping", name) continue log.info("Fetching resources from %s", name) @@ -143,23 +148,37 @@ def fetch(config_file: str, state_path: Optional[str], sanitize_secrets: bool) - assert isinstance(cicfg.paths, GHPathsDict) releases_path = cicfg.paths.releases assert releases_path is not None - for asset in ci.get_release_assets(): - path = asset.expand_path(releases_path, cfg.vars) + for rel_asset in ci.get_release_assets(): + path = rel_asset.expand_path(releases_path, cfg.vars) if cfg.datalad.enabled: ensure_datalad(ds, path, cfg.datalad.cfg_proc) - paths = asset.download(Path(path)) + paths = rel_asset.download(Path(path)) relassets_added += len(paths) + if cicfg.gets_packages(): + assert isinstance(ci, GitHubActions) + assert isinstance(cicfg.paths, GHPathsDict) + packages_path = cicfg.paths.packages + assert packages_path is not None + for pkg_asset in ci.get_package_assets(): + path = pkg_asset.expand_path(packages_path, cfg.vars) + if cfg.datalad.enabled: + ensure_datalad(ds, path, cfg.datalad.cfg_proc) + paths = pkg_asset.download(Path(path)) + packages_added += len(paths) statefile.set_since(name, ci.new_since()) log.info("%d logs downloaded", logs_added) log.info("%d artifacts downloaded", artifacts_added) log.info("%d release assets downloaded", relassets_added) + log.info("%d package versions saved", packages_added) if cfg.datalad.enabled: - if logs_added or artifacts_added or relassets_added: + if logs_added or artifacts_added or relassets_added or packages_added: msg = f"[tinuous] {logs_added} logs added" if artifacts_added: msg += f", {artifacts_added} artifacts added" if relassets_added: msg += f", {relassets_added} release assets added" + if packages_added: + msg += f", {packages_added} package versions added" msg += f"\n\nProduced by tinuous {__version__}" ds.save(recursive=True, message=msg) elif statefile.modified: diff --git a/src/tinuous/config.py b/src/tinuous/config.py index bcd1054..db4a5f1 100644 --- a/src/tinuous/config.py +++ b/src/tinuous/config.py @@ -25,10 +25,14 @@ def gets_builds(self) -> bool: def gets_releases(self) -> bool: return False + def gets_packages(self) -> bool: + return False + class GHPathsDict(PathsDict): artifacts: Optional[str] = None releases: Optional[str] = None + packages: Optional[str] = None def gets_builds(self) -> bool: # @@ -37,6 +41,9 @@ def gets_builds(self) -> bool: def gets_releases(self) -> bool: return self.releases is not None + def gets_packages(self) -> bool: + return self.packages is not None + class CCIPathsDict(PathsDict): artifacts: Optional[str] = None @@ -70,10 +77,14 @@ def gets_builds(self) -> bool: def gets_releases(self) -> bool: return self.paths.gets_releases() + def gets_packages(self) -> bool: + return self.paths.gets_packages() + class GitHubConfig(CIConfig): paths: GHPathsDict = Field(default_factory=GHPathsDict) workflows: GHWorkflowSpec = Field(default_factory=GHWorkflowSpec) + packages: WorkflowSpec = Field(default_factory=WorkflowSpec) @field_validator("workflows", mode="before") @classmethod @@ -83,6 +94,14 @@ def _workflow_list(cls, v: Any) -> Any: else: return v + @field_validator("packages", mode="before") + @classmethod + def _package_list(cls, v: Any) -> Any: + if isinstance(v, list): + return {"include": v} + else: + return v + @staticmethod def get_auth_tokens() -> dict[str, str]: return GitHubActions.get_auth_tokens() @@ -100,6 +119,7 @@ def get_system( until=until, token=tokens["github"], workflow_spec=self.workflows, + package_spec=self.packages, ) diff --git a/src/tinuous/github.py b/src/tinuous/github.py index 567b57d..7ddfc79 100644 --- a/src/tinuous/github.py +++ b/src/tinuous/github.py @@ -3,6 +3,8 @@ from collections.abc import Iterator from datetime import datetime, timezone from functools import cached_property +import hashlib +import json from pathlib import Path import re from typing import Any, Dict, List, Optional @@ -19,12 +21,14 @@ CISystem, EventType, GHWorkflowSpec, + WorkflowSpec, ) from .util import expand_template, get_github_token, iterfiles, log, sanitize_pathname class GitHubActions(CISystem): workflow_spec: GHWorkflowSpec + package_spec: Optional["WorkflowSpec"] = None hash2pr: Dict[str, str] = Field(default_factory=dict) @staticmethod @@ -264,6 +268,77 @@ def get_release_assets(self) -> Iterator[GHReleaseAsset]: download_url=asset.browser_download_url, ) + def get_packages(self) -> Iterator[Package]: + # First get the owner type (user or org) from the repo name + owner = self.repo.split("/")[0] + # Try organization packages first, fall back to user packages + for endpoint in [ + f"/orgs/{owner}/packages", + f"/users/{owner}/packages", + ]: + try: + params = {"package_type": "container"} + for item in self.paginate(endpoint, params=params): + yield Package.model_validate(item) + return # Successfully fetched packages + except requests.HTTPError as e: + if e.response is not None and e.response.status_code == 404: + continue # Try next endpoint + raise + + def get_package_versions(self, package: Package) -> Iterator[PackageVersion]: + owner = self.repo.split("/")[0] + # Try organization packages first, fall back to user packages + pkg_name = quote(package.name, safe="") + for endpoint_base in [ + f"/orgs/{owner}/packages/container/{pkg_name}/versions", + f"/users/{owner}/packages/container/{pkg_name}/versions", + ]: + try: + for item in self.paginate(endpoint_base): + yield PackageVersion.model_validate(item) + return # Successfully fetched versions + except requests.HTTPError as e: + if e.response is not None and e.response.status_code == 404: + continue # Try next endpoint + raise + + def get_package_assets(self) -> Iterator[GHPackageAsset]: + log.info("Fetching packages newer than %s", self.since) + if self.until is not None: + log.info("Skipping packages newer than %s", self.until) + for pkg in self.get_packages(): + # Filter packages based on package_spec + if self.package_spec and not self.package_spec.match(pkg.name): + log.debug("Skipping package %s (filtered out)", pkg.name) + continue + log.info("Found package %s", pkg.name) + for version in self.get_package_versions(pkg): + ts = version.updated_at + if ts <= self.since or (self.until is not None and ts > self.until): + continue + self.register_build(ts, True) + tags = version.metadata.container.tags + tags_str = ", ".join(tags) if tags else "(no tags)" + log.info( + "Found package version %s (tags: %s) for %s", + version.name, + tags_str, + pkg.name, + ) + yield GHPackageAsset( + client=self.client, + updated_at=ts, + package_name=pkg.name, + package_type=pkg.package_type, + version_id=version.id, + version_name=version.name, + tags=tags, + url=version.url, + html_url=version.html_url, + description=version.description, + ) + class GHAAsset(BuildAsset): workflow_name: str @@ -497,3 +572,366 @@ class Release(BaseModel): created_at: datetime published_at: Optional[datetime] = None assets: List[ReleaseAsset] + + +class ContainerMetadata(BaseModel): + tags: List[str] + + +class PackageMetadata(BaseModel): + container: ContainerMetadata + + +class PackageVersion(BaseModel): + id: int + name: str + url: Optional[str] = None + package_html_url: Optional[str] = None + created_at: datetime + updated_at: datetime + html_url: Optional[str] = None + metadata: PackageMetadata + # Additional fields that may be present + description: Optional[str] = None + + +class Package(BaseModel): + id: int + name: str + package_type: str + created_at: datetime + updated_at: datetime + + +# The `arbitrary_types_allowed` is for APIClient +class GHPackageAsset(BaseModel, arbitrary_types_allowed=True): + client: APIClient + updated_at: datetime + package_name: str + package_type: str + version_id: int + version_name: str + tags: List[str] + url: Optional[str] = None + html_url: Optional[str] = None + description: Optional[str] = None + + def path_fields(self) -> dict[str, Any]: + utc_date = self.updated_at.astimezone(timezone.utc) + # Use the first tag as primary tag, or version_name if no tags + primary_tag = self.tags[0] if self.tags else self.version_name + return { + "timestamp": utc_date, + "timestamp_local": self.updated_at.astimezone(), + "year": utc_date.strftime("%Y"), + "month": utc_date.strftime("%m"), + "day": utc_date.strftime("%d"), + "hour": utc_date.strftime("%H"), + "minute": utc_date.strftime("%M"), + "second": utc_date.strftime("%S"), + "ci": "github", + "type": "package", + "package_name": sanitize_pathname(self.package_name), + "package_type": self.package_type, + "version_id": str(self.version_id), + "version_name": sanitize_pathname(self.version_name), + "tag": sanitize_pathname(primary_tag), + "tags": ",".join(sanitize_pathname(t) for t in self.tags), + } + + def expand_path(self, path_template: str, variables: dict[str, str]) -> str: + return expand_template(path_template, self.path_fields(), variables) + + def download(self, path: Path) -> list[Path]: + # For packages (containers), we download metadata, manifest, and layers + path.mkdir(parents=True, exist_ok=True) + + # Save basic metadata + metadata_file = path / "metadata.json" + oci_complete_marker = path / ".oci_complete" + + if oci_complete_marker.exists(): + log.info( + "Package %s version %s already downloaded to %s; skipping", + self.package_name, + self.version_name, + path, + ) + return [] + + downloaded_files = [] + + # Save basic metadata + if not metadata_file.exists(): + log.info( + "Saving metadata for package %s version %s to %s", + self.package_name, + self.version_name, + metadata_file, + ) + metadata = { + "package_name": self.package_name, + "package_type": self.package_type, + "version_id": self.version_id, + "version_name": self.version_name, + "tags": self.tags, + "updated_at": self.updated_at.isoformat(), + "url": self.url, + "html_url": self.html_url, + "description": self.description, + } + with metadata_file.open("w", encoding="utf-8") as fp: + json.dump(metadata, fp, indent=2) + downloaded_files.append(metadata_file) + + # Download container manifest and layers for GHCR + if self.package_type == "container": + try: + log.info( + "Downloading container image for package %s version %s", + self.package_name, + self.version_name, + ) + oci_files = self._download_container_image(path) + downloaded_files.extend(oci_files) + + # Mark as complete + oci_complete_marker.touch() + downloaded_files.append(oci_complete_marker) + + log.info("Container image downloaded to OCI layout at %s", path) + except Exception as e: + log.warning( + "Failed to download container image for %s: %s", + self.package_name, + str(e), + ) + + return downloaded_files + + def _download_container_image(self, base_path: Path) -> list[Path]: + """ + Download the full container image in OCI layout format. + + This creates a directory structure compatible with podman/skopeo: + - blobs/sha256/ - Layer and config blobs + - index.json - OCI index pointing to manifest + - oci-layout - OCI layout version file + """ + downloaded = [] + + # Get owner for GHCR access + owner = self._get_owner() + if not owner: + log.warning("Could not determine owner for container download") + return [] + + # Create OCI layout structure + blobs_dir = base_path / "blobs" / "sha256" + blobs_dir.mkdir(parents=True, exist_ok=True) + + # Write OCI layout version + oci_layout_file = base_path / "oci-layout" + if not oci_layout_file.exists(): + with oci_layout_file.open("w") as f: + json.dump({"imageLayoutVersion": "1.0.0"}, f) + downloaded.append(oci_layout_file) + + # Download manifest + manifest = self._download_container_manifest() + if not manifest: + return downloaded + + # Determine manifest digest + manifest_bytes = json.dumps( + manifest, separators=(',', ':'), sort_keys=True + ).encode('utf-8') + manifest_digest = hashlib.sha256(manifest_bytes).hexdigest() + + # Save manifest as blob + manifest_blob = blobs_dir / manifest_digest + if not manifest_blob.exists(): + with manifest_blob.open("wb") as f: + f.write(manifest_bytes) + downloaded.append(manifest_blob) + + # Download config blob if present + if "config" in manifest: + config_digest = manifest["config"]["digest"].split(":")[-1] + config_blob = blobs_dir / config_digest + if not config_blob.exists(): + log.info("Downloading config blob %s", config_digest[:12]) + self._download_blob(owner, config_digest, config_blob) + downloaded.append(config_blob) + + # Download layer blobs + if "layers" in manifest: + for i, layer in enumerate(manifest["layers"]): + layer_digest = layer["digest"].split(":")[-1] + layer_blob = blobs_dir / layer_digest + if not layer_blob.exists(): + log.info( + "Downloading layer %d/%d: %s", + i + 1, + len(manifest["layers"]), + layer_digest[:12], + ) + self._download_blob( + owner, layer_digest, layer_blob + ) + downloaded.append(layer_blob) + + # Handle multi-platform manifests + if "manifests" in manifest: + log.info("Multi-platform manifest detected") + for sub_manifest in manifest["manifests"]: + sub_digest = sub_manifest["digest"].split(":")[-1] + sub_blob = blobs_dir / sub_digest + if not sub_blob.exists(): + # Download sub-manifest + log.info("Downloading sub-manifest %s", sub_digest[:12]) + self._download_blob( + owner, sub_digest, sub_blob + ) + downloaded.append(sub_blob) + + # Parse and download layers from sub-manifest + with sub_blob.open("rb") as f: + sub_man = json.load(f) + + if "config" in sub_man: + config_digest = sub_man["config"]["digest"].split(":")[-1] + config_blob = blobs_dir / config_digest + if not config_blob.exists(): + self._download_blob( + owner, config_digest, config_blob + ) + downloaded.append(config_blob) + + if "layers" in sub_man: + for layer in sub_man["layers"]: + layer_digest = layer["digest"].split(":")[-1] + layer_blob = blobs_dir / layer_digest + if not layer_blob.exists(): + self._download_blob( + owner, layer_digest, layer_blob + ) + downloaded.append(layer_blob) + + # Create index.json pointing to the manifest + index_file = base_path / "index.json" + if not index_file.exists(): + manifest_entry: dict[str, Any] = { + "mediaType": manifest.get( + "mediaType", + "application/vnd.oci.image.manifest.v1+json" + ), + "digest": f"sha256:{manifest_digest}", + "size": len(manifest_bytes), + } + if self.tags: + manifest_entry["annotations"] = { + "org.opencontainers.image.ref.name": self.tags[0] + } + + index_data: dict[str, Any] = { + "schemaVersion": 2, + "manifests": [manifest_entry] + } + + with index_file.open("w") as f: + json.dump(index_data, f, indent=2) + downloaded.append(index_file) + + return downloaded + + def _get_owner(self) -> Optional[str]: + """Extract owner from URL or package name.""" + # Try to extract from URL first + if self.url: + match = re.search(r'/(orgs|users)/([^/]+)/', self.url) + if match: + return match.group(2) + + # Try from package name if it contains a slash + if "/" in self.package_name: + return self.package_name.split("/")[0] + + return None + + def _download_blob( + self, owner: str, digest: str, target: Path + ) -> None: + """Download a blob from GHCR.""" + blob_url = ( + f"https://ghcr.io/v2/{owner}/{self.package_name}/" + f"blobs/sha256:{digest}" + ) + + try: + r = self.client.get(blob_url, stream=True) + r.raise_for_status() + + with target.open("wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + except Exception as e: + log.error("Failed to download blob %s: %s", digest[:12], str(e)) + raise + + def _download_container_manifest(self) -> Optional[dict[str, Any]]: + """Download the OCI/Docker manifest for a container package.""" + # GHCR registry URL pattern: ghcr.io//: + # or @ + + # Extract owner from package name if it contains a slash + # Otherwise use the repo owner from the client + owner_val = self.package_name.split("/")[0] + owner = owner_val if "/" in self.package_name else None + + # For GitHub Container Registry, use the registry API + # Manifest: ghcr.io/v2///manifests/ + + # Get the first tag or use the version_name (digest) + reference = self.tags[0] if self.tags else self.version_name + + # Construct the manifest URL for GHCR + # Extract the owner from the API URL if available + if self.url: + # URL format: https://api.github.com/orgs/{owner}/packages/ + # container/{package}/versions/{id} + # or https://api.github.com/users/{owner}/packages/ + # container/{package}/versions/{id} + match = re.search(r'/(orgs|users)/([^/]+)/', self.url) + if match: + owner = match.group(2) + + if not owner: + log.warning("Could not determine owner for manifest download") + return None + + # Construct the GHCR manifest URL + base_url = f"https://ghcr.io/v2/{owner}/{self.package_name}" + manifest_url = f"{base_url}/manifests/{reference}" + + try: + # Request the manifest with Accept header for OCI manifest + accept_header = ( + "application/vnd.oci.image.manifest.v1+json, " + "application/vnd.docker.distribution.manifest.v2+json, " + "application/vnd.docker.distribution.manifest.list.v2+json" + ) + headers = {"Accept": accept_header} + + # Use the GitHub token for authentication with GHCR + # GHCR uses the GitHub token as a bearer token + r = self.client.get(manifest_url, headers=headers) + manifest: dict[str, Any] = r.json() + return manifest + except Exception as e: + log.debug( + "Failed to fetch manifest from %s: %s", + manifest_url, + str(e), + ) + return None diff --git a/test/data/test_packages.yaml b/test/data/test_packages.yaml new file mode 100644 index 0000000..3fdeff3 --- /dev/null +++ b/test/data/test_packages.yaml @@ -0,0 +1,8 @@ +repo: con/tinuous-inception +ci: + github: + paths: + packages: '{year}/{month}/{package_name}/{tag}/' + packages: + - tinuous-inception +since: 2026-01-01T00:00:00Z diff --git a/test/test_config.py b/test/test_config.py index 28f1e10..8279445 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -135,7 +135,63 @@ ), ), ), + ( + { + "paths": { + "packages": "{year}/{package_name}/{tag}/", + }, + }, + GitHubConfig( + paths=GHPathsDict(packages="{year}/{package_name}/{tag}/"), + workflows=GHWorkflowSpec( + regex=False, include=[re.compile(r".*")], exclude=[] + ), + ), + ), ], ) def test_parse_github_config(data: dict[str, Any], cfg: GitHubConfig) -> None: assert GitHubConfig.model_validate(data) == cfg + + +def test_ghpathsdict_gets_packages() -> None: + """Test gets_packages method for GHPathsDict.""" + paths_without_packages = GHPathsDict(logs="logs/") + assert not paths_without_packages.gets_packages() + + paths_with_packages = GHPathsDict(packages="{year}/{package_name}/") + assert paths_with_packages.gets_packages() + + +def test_package_filtering() -> None: + """Test package filtering with include/exclude.""" + # Test with list of packages (converted to include) + data = { + "paths": {"packages": "{year}/{package_name}/"}, + "packages": ["tinuous-inception", "nwb2bids"], + } + cfg = GitHubConfig.model_validate(data) + assert cfg.packages.match("tinuous-inception") + assert cfg.packages.match("nwb2bids") + assert not cfg.packages.match("other-package") + + # Test with explicit include/exclude + data = { + "paths": {"packages": "{year}/{package_name}/"}, + "packages": { + "include": ["tinuous-.*"], + "exclude": [".*-test"], + "regex": True, + }, + } + cfg = GitHubConfig.model_validate(data) + assert cfg.packages.match("tinuous-inception") + assert cfg.packages.match("tinuous-prod") + assert not cfg.packages.match("tinuous-test") + assert not cfg.packages.match("other-package") + + # Test default (include all) + data = {"paths": {"packages": "{year}/{package_name}/"}} + cfg = GitHubConfig.model_validate(data) + assert cfg.packages.match("any-package") + assert cfg.packages.match("tinuous-inception") diff --git a/test/test_packages_integration.py b/test/test_packages_integration.py new file mode 100644 index 0000000..76899cd --- /dev/null +++ b/test/test_packages_integration.py @@ -0,0 +1,193 @@ +""" +Integration tests for GitHub Packages support. + +These tests may require: +- GitHub token (GH_TOKEN or GITHUB_TOKEN environment variable) +- Network access to GitHub API and GHCR +- podman (for full container verification) + +Run with: pytest test/test_packages_integration.py -v --integration +""" +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path +import tempfile + +import pytest + +from tinuous.config import Config + +log = logging.getLogger(__name__) + + +@pytest.mark.integration +@pytest.mark.skipif( + not os.environ.get("GH_TOKEN") and not os.environ.get("GITHUB_TOKEN"), + reason="Requires GitHub token" +) +def test_fetch_tinuous_inception_package() -> None: + """ + Integration test that fetches the tinuous-inception package. + + This test verifies: + 1. Package filtering works (includes tinuous-inception) + 2. Metadata and manifest are downloaded + 3. Manifest contains expected OCI fields + """ + from yaml import safe_load + + config_path = Path(__file__).parent / "data" / "test_packages.yaml" + with open(config_path) as fp: + cfg = Config.model_validate(safe_load(fp)) + + # Verify config is correct + assert cfg.repo == "con/tinuous-inception" + ghcfg = cfg.ci.github + assert ghcfg is not None + assert ghcfg.gets_packages() + + # Verify package filtering + assert ghcfg.packages.match("tinuous-inception") + assert not ghcfg.packages.match("other-package") + + # Create a temporary directory for downloads + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + os.chdir(tmppath) + + # Fetch tokens + tokens = ghcfg.get_auth_tokens() + + # Create the GitHub Actions client + ci = ghcfg.get_system( + repo=cfg.repo, + since=cfg.since or cfg.get_since(None), + until=cfg.until, + tokens=tokens, + ) + + # Fetch package assets + packages_found = 0 + for pkg_asset in ci.get_package_assets(): + packages_found += 1 + + # Verify it's the expected package + assert pkg_asset.package_name == "tinuous-inception" + assert pkg_asset.package_type == "container" + + # Download the package + path_template = ghcfg.paths.packages + assert path_template is not None + path_str = pkg_asset.expand_path(path_template, cfg.vars) + path = Path(path_str) + + pkg_asset.download(path) + + # Verify metadata file exists and has expected content + metadata_file = path / "metadata.json" + assert metadata_file.exists() + + with open(metadata_file) as f: + metadata = json.load(f) + + assert metadata["package_name"] == "tinuous-inception" + assert metadata["package_type"] == "container" + assert "tags" in metadata + assert "version_id" in metadata + assert "updated_at" in metadata + + # Verify OCI layout structure + oci_layout = path / "oci-layout" + index_json = path / "index.json" + blobs_dir = path / "blobs" / "sha256" + + assert oci_layout.exists(), "OCI layout file should exist" + assert index_json.exists(), "index.json should exist" + assert blobs_dir.exists(), "blobs/sha256 directory should exist" + + # Verify OCI layout version + with open(oci_layout) as f: + layout = json.load(f) + assert layout["imageLayoutVersion"] == "1.0.0" + + # Verify index.json structure + with open(index_json) as f: + index = json.load(f) + assert index["schemaVersion"] == 2 + assert "manifests" in index + assert len(index["manifests"]) > 0 + + # Verify blobs exist + blobs = list(blobs_dir.glob("*")) + assert len(blobs) > 0, "Should have downloaded some blobs" + log.info("Downloaded %d blobs", len(blobs)) + + # Try to run with podman if available + import subprocess + import shutil + + if shutil.which("podman"): + log.info("Testing image with podman") + try: + # Run the container + result = subprocess.run( + ["podman", "run", f"oci:{path}"], + capture_output=True, + text=True, + timeout=30, + ) + log.info("Podman stdout: %s", result.stdout) + log.info("Podman stderr: %s", result.stderr) + + # Verify we got output + assert result.returncode == 0, ( + f"Podman run failed: {result.stderr}" + ) + assert "Built at:" in result.stdout, ( + "Expected 'Built at:' in output" + ) + log.info("✓ Successfully ran container with podman") + except subprocess.TimeoutExpired: + log.warning("Podman run timed out") + except Exception as e: + log.warning("Podman test failed: %s", str(e)) + else: + log.info("Podman not available, skipping runtime test") + + # Only check the first package version found + break + + assert packages_found > 0, "Should have found at least one package" + + +def test_package_filtering_config() -> None: + """ + Unit test for package filtering configuration without network access. + """ + from yaml import safe_load + + config_yaml = """ +repo: con/tinuous-inception +ci: + github: + paths: + packages: '{year}/{package_name}/{tag}/' + packages: + include: + - tinuous-.* + exclude: + - .*-test + regex: true +""" + cfg = Config.model_validate(safe_load(config_yaml)) + ghcfg = cfg.ci.github + assert ghcfg is not None + + # Test filtering + assert ghcfg.packages.match("tinuous-inception") + assert ghcfg.packages.match("tinuous-prod") + assert not ghcfg.packages.match("tinuous-test") + assert not ghcfg.packages.match("other-package") diff --git a/tox.ini b/tox.ini index eab20b7..9d9f074 100644 --- a/tox.ini +++ b/tox.ini @@ -36,6 +36,8 @@ commands = [pytest] addopts = --cov=tinuous --no-cov-on-fail +markers = + integration: marks tests as integration tests (may require network and credentials) filterwarnings = error #