Files
2025-11-30 08:46:16 +08:00

100 lines
2.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Parse and validate Prow job URLs from gcsweb.
Extracts build_id, prowjob name, and GCS paths.
"""
import re
import sys
import json
from urllib.parse import urlparse
def parse_prowjob_url(url):
"""
Parse a Prow job URL and extract relevant information.
Args:
url: gcsweb URL containing test-platform-results
Returns:
dict with keys: bucket_path, build_id, prowjob_name, gcs_base_path
Raises:
ValueError: if URL format is invalid
"""
# Find test-platform-results in URL
if 'test-platform-results/' not in url:
raise ValueError(
"URL must contain 'test-platform-results/' substring.\n"
"Example: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/"
"test-platform-results/pr-logs/pull/30393/pull-ci-openshift-origin-main-okd-scos-e2e-aws-ovn/1978913325970362368/"
)
# Extract path after test-platform-results/
bucket_path = url.split('test-platform-results/')[1]
# Remove trailing slash if present
bucket_path = bucket_path.rstrip('/')
# Find build_id: at least 10 consecutive decimal digits delimited by /
build_id_pattern = r'/(\d{10,})(?:/|$)'
match = re.search(build_id_pattern, bucket_path)
if not match:
raise ValueError(
f"Could not find build ID (10+ decimal digits) in URL path.\n"
f"Bucket path: {bucket_path}\n"
f"Expected pattern: /NNNNNNNNNN/ where N is a digit"
)
build_id = match.group(1)
# Extract prowjob name: path segment immediately before build_id
# Split bucket_path by / and find segment before build_id
path_segments = bucket_path.split('/')
try:
build_id_index = path_segments.index(build_id)
if build_id_index == 0:
raise ValueError("Build ID cannot be the first path segment")
prowjob_name = path_segments[build_id_index - 1]
except (ValueError, IndexError):
raise ValueError(
f"Could not extract prowjob name from path.\n"
f"Build ID: {build_id}\n"
f"Path segments: {path_segments}"
)
# Construct GCS base path
gcs_base_path = f"gs://test-platform-results/{bucket_path}/"
return {
'bucket_path': bucket_path,
'build_id': build_id,
'prowjob_name': prowjob_name,
'gcs_base_path': gcs_base_path,
'original_url': url
}
def main():
"""Parse URL from command line argument and output JSON."""
if len(sys.argv) != 2:
print("Usage: parse_url.py <prowjob-url>", file=sys.stderr)
sys.exit(1)
url = sys.argv[1]
try:
result = parse_prowjob_url(url)
print(json.dumps(result, indent=2))
return 0
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == '__main__':
sys.exit(main())