twarc/expansions.py at main · DocNow/twarc · GitHub


      Skip to content
      
    
                Sign up
              
              
                Sign up
              

                    Why GitHub?
                    
                      
                    Features →
                    	Mobile →
	Actions →
	Codespaces →
	Packages →
	Security →
	Code review →
	Project management →
	Integrations →


                    	GitHub Sponsors →
	Customer stories→


                Team
              
	
                Enterprise
              
	
                    Explore
                    
                      
                    	Explore GitHub →


                    Learn and contribute

                    	Topics →
	Collections →
	Trending →
	Learning Lab →
	Open source guides →


                    Connect with others

                    	The ReadME Project →
	Events →
	Community forum →
	GitHub Education →
	GitHub Stars program →


                Marketplace
              
	
                    Pricing
                    
                       
                    Plans →

                    	Compare plans →
	Contact Sales →


                    	Education →


        In this repository
      
      
        All GitHub
      
      ↵
    

      Jump to
      ↵
    

    No suggested jump to results
  

        In this repository
      
      
        All GitHub
      
      ↵
    

      Jump to
      ↵
    

        In this organization
      
      
        All GitHub
      
      ↵
    

      Jump to
      ↵
    

        In this repository
      
      
        All GitHub
      
      ↵
    

      Jump to
      ↵
    

          Sign in
        
            
              Sign up
            
            
              Sign up
            
      
      {{ message }}


    DocNow
  
  /
  
    twarc
  
  
    Notifications

  
        Star

    
      1k
    

          Fork

      
        214
      
  
          Code
            

          Issues
            53

    
          Pull requests
            0

    
          Actions
            

          Projects
            0

    
          Wiki
            

          Security
            

          Insights
            

            More
          

                    Code
                
	
                    Issues
                
	
                    Pull requests
                
	
                    Actions
                
	
                    Projects
                
	
                    Wiki
                
	
                    Security
                
	
                    Insights
                

    Permalink

    
      main
      
    
      Switch branches/tags
      
    
          Branches
          Tags
        

    Nothing to show


    {{ refName }}
    default
  

              View all branches
          

              Nothing to show

            
    {{ refName }}
    default
  

              View all tags
          
        
        twarc/twarc/expansions.py
          /
  
      
    Jump to
    
  
        Code definitions
        
          
              extract_includes
              Function
            
              
              flatten
              Function
            
              
              expand_payload
              Function
        

          Code navigation index up-to-date
          
        
        Go to file
      

                Go to file
                T
            
	
                  Go to line
                  L
                
              
                  Go to definition
                  R
                
              
                Copy path
              
            
                  Copy permalink
                
              
          Cannot retrieve contributors at this time
        

      207 lines (177 sloc)
      
    6.18 KB
  

      Raw
        Blame
    

                  Open with Desktop
                
            
              View raw
            
          
                View blame
              
            
		"""
		This module contains a list of the known Twitter V2+ API expansions and fields
		for each expansion, and a function for "flattening" a result set, including all
		expansions inline
		
		"""
		

		from collections import defaultdict
		

		EXPANSIONS = [
		    "author_id",
		    "in_reply_to_user_id",
		    "referenced_tweets.id",
		    "referenced_tweets.id.author_id",
		    "entities.mentions.username",
		    "attachments.poll_ids",
		    "attachments.media_keys",
		    "geo.place_id",
		]
		

		USER_FIELDS = [
		    "created_at",
		    "description",
		    "entities",
		    "id",
		    "location",
		    "name",
		    "pinned_tweet_id",
		    "profile_image_url",
		    "protected",
		    "public_metrics",
		    "url",
		    "username",
		    "verified",
		    "withheld",
		]
		

		TWEET_FIELDS = [
		    "attachments",
		    "author_id",
		    "context_annotations",
		    "conversation_id",
		    "created_at",
		    "entities",
		    "geo",
		    "id",
		    "in_reply_to_user_id",
		    "lang",
		    "public_metrics",
		    # "non_public_metrics", # private
		    # "organic_metrics", # private
		    # "promoted_metrics", # private
		    "text",
		    "possibly_sensitive",
		    "referenced_tweets",
		    "reply_settings",
		    "source",
		    "withheld",
		]
		

		MEDIA_FIELDS = [
		    "duration_ms",
		    "height",
		    "media_key",
		    "preview_image_url",
		    "type",
		    "url",
		    "width",
		    # "non_public_metrics", # private
		    # "organic_metrics", # private
		    # "promoted_metrics", # private
		    "public_metrics",
		]
		

		POLL_FIELDS = ["duration_minutes", "end_datetime", "id", "options", "voting_status"]
		

		PLACE_FIELDS = [
		    "contained_within",
		    "country",
		    "country_code",
		    "full_name",
		    "geo",
		    "id",
		    "name",
		    "place_type",
		]
		

		EVERYTHING = {
		    "expansions": ",".join(EXPANSIONS),
		    "user.fields": ",".join(USER_FIELDS),
		    "tweet.fields": ",".join(TWEET_FIELDS),
		    "media.fields": ",".join(MEDIA_FIELDS),
		    "poll.fields": ",".join(POLL_FIELDS),
		    "place.fields": ",".join(PLACE_FIELDS),
		}
		

		# For endpoints focused on user objects such as looking up users and followers.
		# Not all of the expansions are available for these endpoints.
		USER_EVERYTHING = {
		    "expansions": "pinned_tweet_id",
		    "tweet.fields": ",".join(TWEET_FIELDS),
		    "user.fields": ",".join(USER_FIELDS),
		}
		

		def extract_includes(response, expansion, _id="id"):
		    if "includes" in response and expansion in response["includes"]:
		        return defaultdict(
		            lambda: {},
		            {include[_id]: include for include in response["includes"][expansion]},
		        )
		    else:
		        return defaultdict(lambda: {})
		

		def flatten(response):
		    """
		    Flatten the response. Expects an entire page response from the API (data,
		    includes, meta) Defaults: Return empty objects for things missing in
		    includes. Doesn't modify tweets, only adds extra data.
		    """
		

		    # Users extracted both by id and by username for expanding mentions
		    includes_users = defaultdict(
		        lambda: {},
		        {
		            **extract_includes(response, "users", "id"),
		            **extract_includes(response, "users", "username"),
		        },
		    )
		    # Media is by media_key, not id
		    includes_media = extract_includes(response, "media", "media_key")
		    includes_polls = extract_includes(response, "polls")
		    includes_places = extract_includes(response, "places")
		    # Tweets in includes will themselves be expanded
		    includes_tweets = extract_includes(response, "tweets")
		    # Errors are returned but unused here for now
		    includes_errors = extract_includes(response, "errors")
		

		    def expand_payload(payload):
		        """
		        Recursively step through an object and sub objects and append extra data.
		        Can be applied to any tweet, list of tweets, sub object of tweet etc.
		        """
		

		        # Don't try to expand on primitive values, return strings as is:
		        if isinstance(payload, (str, bool, int, float)):
		            return payload
		        # expand list items individually:
		        elif isinstance(payload, list):
		            payload = [expand_payload(item) for item in payload]
		            return payload
		        # Try to expand on dicts within dicts:
		        elif isinstance(payload, dict):
		            for key, value in payload.items():
		                payload[key] = expand_payload(value)
		

		        if "author_id" in payload:
		            payload["author"] = includes_users[payload["author_id"]]
		

		        if "in_reply_to_user_id" in payload:
		            payload["in_reply_to_user"] = includes_users[payload["in_reply_to_user_id"]]
		

		        if "media_keys" in payload:
		            payload["media"] = list(
		                includes_media[media_key] for media_key in payload["media_keys"]
		            )
		

		        if "poll_ids" in payload and len(payload["poll_ids"]) > 0:
		            poll_id = payload["poll_ids"][-1]  # only ever 1 poll per tweet.
		            payload["poll"] = includes_polls[poll_id]
		

		        if "geo" in payload and "place_id" in payload["geo"]:
		            place_id = payload["geo"]["place_id"]
		            payload["geo"] = {**payload["geo"], **includes_places[place_id]}
		

		        if "mentions" in payload:
		            payload["mentions"] = list(
		                {**referenced_user, **includes_users[referenced_user["username"]]}
		                for referenced_user in payload["mentions"]
		            )
		

		        if "referenced_tweets" in payload:
		            payload["referenced_tweets"] = list(
		                {**referenced_tweet, **includes_tweets[referenced_tweet["id"]]}
		                for referenced_tweet in payload["referenced_tweets"]
		            )
		

		        if "pinned_tweet_id" in payload:
		            payload["pinned_tweet"] = includes_tweets[payload["pinned_tweet_id"]]
		

		        return payload
		

		    # First, expand the included tweets, before processing actual result tweets:
		    for included_id, included_tweet in extract_includes(response, "tweets").items():
		        includes_tweets[included_id] = expand_payload(included_tweet)
		

		    # Now flatten the list of tweets or an individual tweet
		    if "data" in response:
		        response["data"] = expand_payload(response["data"])
		

		        # Add the __twarc metadata to each tweet if it's a result set
		        if "__twarc" in response and isinstance(response["data"], list):
		            for tweet in response["data"]:
		                tweet["__twarc"] = response["__twarc"]
		

		    return response


            Copy lines
          
        
            Copy permalink
          
        
	View git blame
	Reference in new issue


        Go

    
    	© 2021 GitHub, Inc.
	Terms
	Privacy
	Security
	Status
	Docs


    	Contact GitHub
	Pricing
	API
	Training
	Blog
	About


    You can’t perform that action at this time.
  

    You signed in with another tab or window. Reload to refresh your session.
    You signed out in another tab or window. Reload to refresh your session.