88for sales agents to verify they are authorized for specific properties.
99"""
1010
11+ import ipaddress
1112from typing import Any
1213from urllib .parse import urlparse
1314
@@ -89,6 +90,38 @@ def _validate_publisher_domain(domain: str) -> str:
8990 return domain
9091
9192
93+ def _validate_redirect_url (url : str ) -> None :
94+ """Validate an authoritative_location URL is safe to follow.
95+
96+ Rejects private/reserved IP addresses and localhost to prevent SSRF attacks
97+ where a malicious publisher redirects the SDK to internal services.
98+
99+ Args:
100+ url: The HTTPS URL to validate
101+
102+ Raises:
103+ AdagentsValidationError: If the URL targets a private/reserved address
104+ """
105+ parsed = urlparse (url )
106+ hostname = parsed .hostname or ""
107+
108+ # Reject localhost by name
109+ if hostname in ("localhost" , "localhost.localdomain" ) or hostname .endswith (".local" ):
110+ raise AdagentsValidationError (
111+ "authoritative_location must not target localhost"
112+ )
113+
114+ # Reject private/reserved IP addresses
115+ try :
116+ ip = ipaddress .ip_address (hostname )
117+ if ip .is_private or ip .is_loopback or ip .is_link_local or ip .is_reserved :
118+ raise AdagentsValidationError (
119+ "authoritative_location must not target private/reserved addresses"
120+ )
121+ except ValueError :
122+ pass # Not an IP literal — hostname is fine
123+
124+
92125def normalize_url (url : str ) -> str :
93126 """Normalize URL by removing protocol and trailing slash.
94127
@@ -327,13 +360,21 @@ async def fetch_adagents(
327360 # Track visited URLs to detect loops
328361 visited_urls : set [str ] = set ()
329362
363+ is_redirect = False
364+
330365 for depth in range (MAX_REDIRECT_DEPTH + 1 ):
331366 # Check for redirect loop
332367 if url in visited_urls :
333- raise AdagentsValidationError (f"Circular redirect detected: { url } already visited" )
368+ raise AdagentsValidationError (
369+ "Circular redirect detected in authoritative_location chain"
370+ )
334371 visited_urls .add (url )
335372
336- data = await _fetch_adagents_url (url , timeout , user_agent , client )
373+ # Use the caller's client for the initial fetch only. Redirect targets
374+ # use a fresh client to avoid leaking credentials to third-party URLs.
375+ fetch_client = None if is_redirect else client
376+
377+ data = await _fetch_adagents_url (url , timeout , user_agent , fetch_client )
337378
338379 # Check if this is a redirect. A response with authoritative_location but no
339380 # authorized_agents indicates a redirect. If both are present, authorized_agents
@@ -349,6 +390,9 @@ async def fetch_adagents(
349390 f"authoritative_location must be an HTTPS URL, got: { authoritative_url !r} "
350391 )
351392
393+ # Validate the redirect target is not a private/reserved address
394+ _validate_redirect_url (authoritative_url )
395+
352396 # Check if we've exceeded max depth
353397 if depth >= MAX_REDIRECT_DEPTH :
354398 raise AdagentsValidationError (
@@ -357,6 +401,7 @@ async def fetch_adagents(
357401
358402 # Follow the redirect
359403 url = authoritative_url
404+ is_redirect = True
360405 continue
361406
362407 # We have the final data with authorized_agents (or both fields present,
@@ -476,9 +521,65 @@ async def verify_agent_for_property(
476521 )
477522
478523
524+ def _resolve_agent_properties (
525+ agent : dict [str , Any ],
526+ top_level_properties : list [dict [str , Any ]],
527+ ) -> list [dict [str , Any ]]:
528+ """Resolve properties for a single agent entry based on its authorization_type.
529+
530+ Args:
531+ agent: An authorized_agents entry
532+ top_level_properties: The top-level properties array from adagents.json
533+
534+ Returns:
535+ List of resolved property dicts for this agent
536+ """
537+ authorization_type = agent .get ("authorization_type" , "" )
538+
539+ # Handle inline_properties, or legacy entries with properties array but no authorization_type
540+ if authorization_type == "inline_properties" or (
541+ not authorization_type and "properties" in agent
542+ ):
543+ properties = agent .get ("properties" , [])
544+ if not isinstance (properties , list ):
545+ return []
546+ return [p for p in properties if isinstance (p , dict )]
547+
548+ # Handle property_ids (filter top-level properties by property_id)
549+ if authorization_type == "property_ids" :
550+ authorized_ids = set (agent .get ("property_ids" , []))
551+ return [
552+ p
553+ for p in top_level_properties
554+ if isinstance (p , dict ) and p .get ("property_id" ) in authorized_ids
555+ ]
556+
557+ # Handle property_tags (filter top-level properties by tags)
558+ if authorization_type == "property_tags" :
559+ authorized_tags = {t for t in agent .get ("property_tags" , []) if isinstance (t , str )}
560+ return [
561+ p
562+ for p in top_level_properties
563+ if isinstance (p , dict )
564+ and {t for t in p .get ("tags" , []) if isinstance (t , str )} & authorized_tags
565+ ]
566+
567+ # Handle publisher_properties (cross-domain references)
568+ if authorization_type == "publisher_properties" :
569+ publisher_props = agent .get ("publisher_properties" , [])
570+ if not isinstance (publisher_props , list ):
571+ return []
572+ return [p for p in publisher_props if isinstance (p , dict )]
573+
574+ return []
575+
576+
479577def get_all_properties (adagents_data : dict [str , Any ]) -> list [dict [str , Any ]]:
480578 """Extract all properties from adagents.json data.
481579
580+ Handles all authorization types: inline_properties, property_ids,
581+ property_tags, and publisher_properties.
582+
482583 Args:
483584 adagents_data: Parsed adagents.json data
484585
@@ -495,6 +596,10 @@ def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:
495596 if not isinstance (authorized_agents , list ):
496597 raise AdagentsValidationError ("adagents.json must have 'authorized_agents' array" )
497598
599+ top_level_properties = adagents_data .get ("properties" , [])
600+ if not isinstance (top_level_properties , list ):
601+ top_level_properties = []
602+
498603 properties = []
499604 for agent in authorized_agents :
500605 if not isinstance (agent , dict ):
@@ -504,16 +609,11 @@ def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:
504609 if not agent_url :
505610 continue
506611
507- agent_properties = agent .get ("properties" , [])
508- if not isinstance (agent_properties , list ):
509- continue
612+ agent_properties = _resolve_agent_properties (agent , top_level_properties )
510613
511- # Add each property with the agent URL for reference
512614 for prop in agent_properties :
513- if isinstance (prop , dict ):
514- # Create a copy and add agent_url
515- prop_with_agent = {** prop , "agent_url" : agent_url }
516- properties .append (prop_with_agent )
615+ prop_with_agent = {** prop , "agent_url" : agent_url }
616+ properties .append (prop_with_agent )
517617
518618 return properties
519619
@@ -570,12 +670,10 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
570670 if not isinstance (authorized_agents , list ):
571671 raise AdagentsValidationError ("adagents.json must have 'authorized_agents' array" )
572672
573- # Get top-level properties for reference-based authorization types
574673 top_level_properties = adagents_data .get ("properties" , [])
575674 if not isinstance (top_level_properties , list ):
576675 top_level_properties = []
577676
578- # Normalize the agent URL for comparison
579677 normalized_agent_url = normalize_url (agent_url )
580678
581679 for agent in authorized_agents :
@@ -586,48 +684,10 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
586684 if not agent_url_from_json :
587685 continue
588686
589- # Match agent URL (protocol-agnostic)
590687 if normalize_url (agent_url_from_json ) != normalized_agent_url :
591688 continue
592689
593- # Found the agent - determine authorization type
594- authorization_type = agent .get ("authorization_type" , "" )
595-
596- # Handle inline_properties (properties array directly on agent)
597- if authorization_type == "inline_properties" or "properties" in agent :
598- properties = agent .get ("properties" , [])
599- if not isinstance (properties , list ):
600- return []
601- return [p for p in properties if isinstance (p , dict )]
602-
603- # Handle property_ids (filter top-level properties by property_id)
604- if authorization_type == "property_ids" :
605- authorized_ids = set (agent .get ("property_ids" , []))
606- return [
607- p
608- for p in top_level_properties
609- if isinstance (p , dict ) and p .get ("property_id" ) in authorized_ids
610- ]
611-
612- # Handle property_tags (filter top-level properties by tags)
613- if authorization_type == "property_tags" :
614- authorized_tags = set (agent .get ("property_tags" , []))
615- return [
616- p
617- for p in top_level_properties
618- if isinstance (p , dict ) and set (p .get ("tags" , [])) & authorized_tags
619- ]
620-
621- # Handle publisher_properties (cross-domain references)
622- # Returns the selector objects; caller must resolve against other domains
623- if authorization_type == "publisher_properties" :
624- publisher_props = agent .get ("publisher_properties" , [])
625- if not isinstance (publisher_props , list ):
626- return []
627- return [p for p in publisher_props if isinstance (p , dict )]
628-
629- # No recognized authorization type - return empty
630- return []
690+ return _resolve_agent_properties (agent , top_level_properties )
631691
632692 return []
633693
0 commit comments