Spaces:
Sleeping
Sleeping
File size: 6,214 Bytes
49e67a8 b1d46da 49e67a8 b1d46da 49e67a8 b1d46da 49e67a8 b1d46da 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 f4d6026 49e67a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | import requests
import json
from datetime import date, timedelta
from dotenv import load_dotenv
import os
# Load values from .env into environment
load_dotenv()
# Access the API key
API_KEY = os.getenv("TWITTER_API_KEY", "").strip()
def search_tweets(query, query_type="Latest", limit=20):
"""
Searches for tweets using the twitterapi.io advanced search endpoint.
"""
if not API_KEY:
print("β Error: TWITTER_API_KEY not found or empty")
return None
url = "https://api.twitterapi.io/twitter/tweet/advanced_search"
headers = {"X-API-Key": API_KEY.strip()}
params = {"query": query, "queryType": query_type, "limit": limit}
print(f"π Executing search with query: {query}")
try:
response = requests.get(url, headers=headers, params=params)
except Exception as e:
print(f"β Request error: {e}")
return None
if response.status_code == 200:
return response.json()
else:
print(f"Error: {response.status_code}")
print(response.text)
return None
def extract_tweets(result_json):
"""
Extracts a normalized list of tweets from the API result.
Returns a list of dicts with keys: tweet_url, location, created_at, text, hashtags
"""
if not result_json or 'tweets' not in result_json:
return []
tweets = result_json.get('tweets', [])
extracted_data = []
for tweet in tweets:
tweet_url = tweet.get('url')
text = tweet.get('text')
created_at = tweet.get('createdAt')
location = tweet.get('author', {}).get('location', None)
hashtags = [tag['text'] for tag in tweet.get('entities', {}).get('hashtags', [])]
extracted_data.append({
'tweet_url': tweet_url,
'location': location,
'created_at': created_at,
'text': text,
'hashtags': hashtags
})
return extracted_data
def build_custom_query(hazard_type=None, location=None, days_back=1):
"""
Builds a custom query based on provided hazard type and location.
Args:
hazard_type (str): Specific hazard type to search for
location (str): Specific location to search for
days_back (int): Number of days back to search (default: 1)
Returns:
str: Custom search query
"""
# Default hazard keywords
default_hazards = [
"flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves",
"swell", "coastal flooding", "rip current", "coastal erosion",
"water discoloration", "algal bloom", "marine debris", "pollution"
]
# Default location keywords
default_locations = [
"Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
"Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea"
]
# Build hazard query
if hazard_type:
# Use provided hazard type
hazard_query = f'"{hazard_type}"'
else:
# Use default hazards
hazard_query = "(" + " OR ".join([f'"{hazard}"' for hazard in default_hazards]) + ")"
# Build location query
if location:
# Use provided location
location_query = f'"{location}"'
else:
# Use default locations
location_query = "(" + " OR ".join([f'"{loc}"' for loc in default_locations]) + ")"
# Language filter
allowed_languages = [
"as", "bn", "brx", "doi", "gu", "hi", "kn", "ks", "kok", "ml", "mni",
"mr", "ne", "or", "pa", "sa", "sat", "sd", "ta", "te", "ur", "en", "bh"
]
lang_query = "(" + " OR ".join([f"lang:{lang}" for lang in allowed_languages]) + ")"
# Date filter
search_date = date.today() - timedelta(days=days_back)
date_filter = f"since:{search_date.strftime('%Y-%m-%d')}"
# Combine all parts
full_query = f"{hazard_query} {location_query} {lang_query} {date_filter}"
return full_query
def build_default_query():
"""
Builds the default hazard + India coastal locations + language + date query.
"""
return build_custom_query()
def fetch_hazard_tweets(limit=20):
"""
Fetches tweets matching the default hazard query and returns extracted list.
"""
query = build_default_query()
result = search_tweets(query=query, query_type="Latest", limit=limit)
return extract_tweets(result)
def fetch_custom_tweets(hazard_type=None, location=None, limit=20, days_back=1):
"""
Fetches tweets based on custom hazard type and location keywords.
Args:
hazard_type (str, optional): Specific hazard type to search for
location (str, optional): Specific location to search for
limit (int): Maximum number of tweets to fetch (default: 20)
days_back (int): Number of days back to search (default: 1)
Returns:
list: List of extracted tweets
"""
query = build_custom_query(hazard_type=hazard_type, location=location, days_back=days_back)
print(f"π Custom search query: {query}")
result = search_tweets(query=query, query_type="Latest", limit=limit)
return extract_tweets(result)
def get_available_hazards():
"""
Returns a list of available hazard types for keyword search.
"""
return [
"flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves",
"swell", "coastal flooding", "rip current", "coastal erosion",
"water discoloration", "algal bloom", "marine debris", "pollution"
]
def get_available_locations():
"""
Returns a list of available locations for keyword search.
"""
return [
"Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa",
"Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea",
"Tamil Nadu", "Maharashtra", "Karnataka", "Andaman", "Nicobar", "Lakshadweep",
"Kochi", "Cochin", "Mangaluru", "Mangalore", "Chandipur", "Paradip", "Digha", "Gopalpur"
]
if __name__ == "__main__":
tweets = fetch_hazard_tweets(limit=20)
if tweets:
print("\nExtracted tweets:")
print(json.dumps(tweets, indent=2, ensure_ascii=False))
|