Spaces:

prathamesh788
/

pravaah

Sleeping

File size: 6,214 Bytes

import requests
import json
from datetime import date, timedelta

from dotenv import load_dotenv
import os

# Load values from .env into environment
load_dotenv()

# Access the API key
API_KEY = os.getenv("TWITTER_API_KEY", "").strip()


def search_tweets(query, query_type="Latest", limit=20):
    """
    Searches for tweets using the twitterapi.io advanced search endpoint.
    """
    if not API_KEY:
        print("❌ Error: TWITTER_API_KEY not found or empty")
        return None
        
    url = "https://api.twitterapi.io/twitter/tweet/advanced_search"
    headers = {"X-API-Key": API_KEY.strip()}
    params = {"query": query, "queryType": query_type, "limit": limit}
    
    print(f"🔍 Executing search with query: {query}")
    try:
        response = requests.get(url, headers=headers, params=params)
    except Exception as e:
        print(f"❌ Request error: {e}")
        return None
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

def extract_tweets(result_json):
    """
    Extracts a normalized list of tweets from the API result.
    Returns a list of dicts with keys: tweet_url, location, created_at, text, hashtags
    """
    if not result_json or 'tweets' not in result_json:
        return []
    tweets = result_json.get('tweets', [])
    extracted_data = []
    for tweet in tweets:
        tweet_url = tweet.get('url')
        text = tweet.get('text')
        created_at = tweet.get('createdAt')
        location = tweet.get('author', {}).get('location', None)
        hashtags = [tag['text'] for tag in tweet.get('entities', {}).get('hashtags', [])]
        extracted_data.append({
            'tweet_url': tweet_url,
            'location': location,
            'created_at': created_at,
            'text': text,
            'hashtags': hashtags
        })
    return extracted_data

def build_custom_query(hazard_type=None, location=None, days_back=1):
    """
    Builds a custom query based on provided hazard type and location.
    
    Args:
        hazard_type (str): Specific hazard type to search for
        location (str): Specific location to search for
        days_back (int): Number of days back to search (default: 1)
    
    Returns:
        str: Custom search query
    """
    # Default hazard keywords
    default_hazards = [
        "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", 
        "swell", "coastal flooding", "rip current", "coastal erosion", 
        "water discoloration", "algal bloom", "marine debris", "pollution"
    ]
    
    # Default location keywords
    default_locations = [
        "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
        "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea"
    ]
    
    # Build hazard query
    if hazard_type:
        # Use provided hazard type
        hazard_query = f'"{hazard_type}"'
    else:
        # Use default hazards
        hazard_query = "(" + " OR ".join([f'"{hazard}"' for hazard in default_hazards]) + ")"
    
    # Build location query
    if location:
        # Use provided location
        location_query = f'"{location}"'
    else:
        # Use default locations
        location_query = "(" + " OR ".join([f'"{loc}"' for loc in default_locations]) + ")"
    
    # Language filter
    allowed_languages = [
        "as", "bn", "brx", "doi", "gu", "hi", "kn", "ks", "kok", "ml", "mni", 
        "mr", "ne", "or", "pa", "sa", "sat", "sd", "ta", "te", "ur", "en", "bh"
    ]
    lang_query = "(" + " OR ".join([f"lang:{lang}" for lang in allowed_languages]) + ")"
    
    # Date filter
    search_date = date.today() - timedelta(days=days_back)
    date_filter = f"since:{search_date.strftime('%Y-%m-%d')}"
    
    # Combine all parts
    full_query = f"{hazard_query} {location_query} {lang_query} {date_filter}"
    return full_query

def build_default_query():
    """
    Builds the default hazard + India coastal locations + language + date query.
    """
    return build_custom_query()

def fetch_hazard_tweets(limit=20):
    """
    Fetches tweets matching the default hazard query and returns extracted list.
    """
    query = build_default_query()
    result = search_tweets(query=query, query_type="Latest", limit=limit)
    return extract_tweets(result)

def fetch_custom_tweets(hazard_type=None, location=None, limit=20, days_back=1):
    """
    Fetches tweets based on custom hazard type and location keywords.
    
    Args:
        hazard_type (str, optional): Specific hazard type to search for
        location (str, optional): Specific location to search for
        limit (int): Maximum number of tweets to fetch (default: 20)
        days_back (int): Number of days back to search (default: 1)
    
    Returns:
        list: List of extracted tweets
    """
    query = build_custom_query(hazard_type=hazard_type, location=location, days_back=days_back)
    print(f"🔍 Custom search query: {query}")
    result = search_tweets(query=query, query_type="Latest", limit=limit)
    return extract_tweets(result)

def get_available_hazards():
    """
    Returns a list of available hazard types for keyword search.
    """
    return [
        "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", 
        "swell", "coastal flooding", "rip current", "coastal erosion", 
        "water discoloration", "algal bloom", "marine debris", "pollution"
    ]

def get_available_locations():
    """
    Returns a list of available locations for keyword search.
    """
    return [
        "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
        "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea",
        "Tamil Nadu", "Maharashtra", "Karnataka", "Andaman", "Nicobar", "Lakshadweep",
        "Kochi", "Cochin", "Mangaluru", "Mangalore", "Chandipur", "Paradip", "Digha", "Gopalpur"
    ]

if __name__ == "__main__":
    tweets = fetch_hazard_tweets(limit=20)
    if tweets:
        print("\nExtracted tweets:")
        print(json.dumps(tweets, indent=2, ensure_ascii=False))