File size: 6,214 Bytes
49e67a8
 
 
 
 
 
 
 
 
 
 
b1d46da
49e67a8
 
 
 
 
 
b1d46da
 
 
 
49e67a8
b1d46da
49e67a8
 
 
b1d46da
 
 
 
 
49e67a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4d6026
49e67a8
f4d6026
 
 
 
 
 
 
 
 
49e67a8
f4d6026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49e67a8
 
f4d6026
49e67a8
 
f4d6026
 
 
 
 
 
 
49e67a8
 
f4d6026
 
 
 
 
 
49e67a8
 
 
 
 
 
 
 
f4d6026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49e67a8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import requests
import json
from datetime import date, timedelta

from dotenv import load_dotenv
import os

# Load values from .env into environment
load_dotenv()

# Access the API key
API_KEY = os.getenv("TWITTER_API_KEY", "").strip()


def search_tweets(query, query_type="Latest", limit=20):
    """
    Searches for tweets using the twitterapi.io advanced search endpoint.
    """
    if not API_KEY:
        print("❌ Error: TWITTER_API_KEY not found or empty")
        return None
        
    url = "https://api.twitterapi.io/twitter/tweet/advanced_search"
    headers = {"X-API-Key": API_KEY.strip()}
    params = {"query": query, "queryType": query_type, "limit": limit}
    
    print(f"πŸ” Executing search with query: {query}")
    try:
        response = requests.get(url, headers=headers, params=params)
    except Exception as e:
        print(f"❌ Request error: {e}")
        return None
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

def extract_tweets(result_json):
    """
    Extracts a normalized list of tweets from the API result.
    Returns a list of dicts with keys: tweet_url, location, created_at, text, hashtags
    """
    if not result_json or 'tweets' not in result_json:
        return []
    tweets = result_json.get('tweets', [])
    extracted_data = []
    for tweet in tweets:
        tweet_url = tweet.get('url')
        text = tweet.get('text')
        created_at = tweet.get('createdAt')
        location = tweet.get('author', {}).get('location', None)
        hashtags = [tag['text'] for tag in tweet.get('entities', {}).get('hashtags', [])]
        extracted_data.append({
            'tweet_url': tweet_url,
            'location': location,
            'created_at': created_at,
            'text': text,
            'hashtags': hashtags
        })
    return extracted_data

def build_custom_query(hazard_type=None, location=None, days_back=1):
    """
    Builds a custom query based on provided hazard type and location.
    
    Args:
        hazard_type (str): Specific hazard type to search for
        location (str): Specific location to search for
        days_back (int): Number of days back to search (default: 1)
    
    Returns:
        str: Custom search query
    """
    # Default hazard keywords
    default_hazards = [
        "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", 
        "swell", "coastal flooding", "rip current", "coastal erosion", 
        "water discoloration", "algal bloom", "marine debris", "pollution"
    ]
    
    # Default location keywords
    default_locations = [
        "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
        "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea"
    ]
    
    # Build hazard query
    if hazard_type:
        # Use provided hazard type
        hazard_query = f'"{hazard_type}"'
    else:
        # Use default hazards
        hazard_query = "(" + " OR ".join([f'"{hazard}"' for hazard in default_hazards]) + ")"
    
    # Build location query
    if location:
        # Use provided location
        location_query = f'"{location}"'
    else:
        # Use default locations
        location_query = "(" + " OR ".join([f'"{loc}"' for loc in default_locations]) + ")"
    
    # Language filter
    allowed_languages = [
        "as", "bn", "brx", "doi", "gu", "hi", "kn", "ks", "kok", "ml", "mni", 
        "mr", "ne", "or", "pa", "sa", "sat", "sd", "ta", "te", "ur", "en", "bh"
    ]
    lang_query = "(" + " OR ".join([f"lang:{lang}" for lang in allowed_languages]) + ")"
    
    # Date filter
    search_date = date.today() - timedelta(days=days_back)
    date_filter = f"since:{search_date.strftime('%Y-%m-%d')}"
    
    # Combine all parts
    full_query = f"{hazard_query} {location_query} {lang_query} {date_filter}"
    return full_query

def build_default_query():
    """
    Builds the default hazard + India coastal locations + language + date query.
    """
    return build_custom_query()

def fetch_hazard_tweets(limit=20):
    """
    Fetches tweets matching the default hazard query and returns extracted list.
    """
    query = build_default_query()
    result = search_tweets(query=query, query_type="Latest", limit=limit)
    return extract_tweets(result)

def fetch_custom_tweets(hazard_type=None, location=None, limit=20, days_back=1):
    """
    Fetches tweets based on custom hazard type and location keywords.
    
    Args:
        hazard_type (str, optional): Specific hazard type to search for
        location (str, optional): Specific location to search for
        limit (int): Maximum number of tweets to fetch (default: 20)
        days_back (int): Number of days back to search (default: 1)
    
    Returns:
        list: List of extracted tweets
    """
    query = build_custom_query(hazard_type=hazard_type, location=location, days_back=days_back)
    print(f"πŸ” Custom search query: {query}")
    result = search_tweets(query=query, query_type="Latest", limit=limit)
    return extract_tweets(result)

def get_available_hazards():
    """
    Returns a list of available hazard types for keyword search.
    """
    return [
        "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", 
        "swell", "coastal flooding", "rip current", "coastal erosion", 
        "water discoloration", "algal bloom", "marine debris", "pollution"
    ]

def get_available_locations():
    """
    Returns a list of available locations for keyword search.
    """
    return [
        "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
        "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea",
        "Tamil Nadu", "Maharashtra", "Karnataka", "Andaman", "Nicobar", "Lakshadweep",
        "Kochi", "Cochin", "Mangaluru", "Mangalore", "Chandipur", "Paradip", "Digha", "Gopalpur"
    ]

if __name__ == "__main__":
    tweets = fetch_hazard_tweets(limit=20)
    if tweets:
        print("\nExtracted tweets:")
        print(json.dumps(tweets, indent=2, ensure_ascii=False))