Spaces:
Sleeping
Sleeping
Merge pull request #8 from boettiger-lab/feat/update_app
Browse files- app/app.py +8 -3
- app/system_prompt.txt +34 -0
- app/variables.py +91 -16
app/app.py
CHANGED
|
@@ -7,6 +7,7 @@ import ibis.selectors as s
|
|
| 7 |
from ibis import _
|
| 8 |
import ibis
|
| 9 |
import openai
|
|
|
|
| 10 |
|
| 11 |
st.set_page_config(layout="wide",
|
| 12 |
page_title="TPL Conservation Almanac",
|
|
@@ -82,6 +83,8 @@ with chatbot_container:
|
|
| 82 |
|
| 83 |
'''
|
| 84 |
Exploratory data queries:
|
|
|
|
|
|
|
| 85 |
- Which states have the highest average cost per acre?
|
| 86 |
'''
|
| 87 |
|
|
@@ -106,8 +109,10 @@ prompt = ChatPromptTemplate.from_messages([
|
|
| 106 |
("system", template),
|
| 107 |
("human", "{input}")
|
| 108 |
]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
structured_llm = llm.with_structured_output(SQLResponse)
|
| 113 |
few_shot_structured_llm = prompt | structured_llm
|
|
@@ -214,7 +219,7 @@ if 'style' not in locals():
|
|
| 214 |
else:
|
| 215 |
# selected all states, so no need to filter
|
| 216 |
style=tpl_style_default(paint, pmtiles)
|
| 217 |
-
if '
|
| 218 |
bounds = llm_bounds
|
| 219 |
else:
|
| 220 |
bounds = get_bounds(state_choice, county_choice, m)
|
|
|
|
| 7 |
from ibis import _
|
| 8 |
import ibis
|
| 9 |
import openai
|
| 10 |
+
import pandas as pd
|
| 11 |
|
| 12 |
st.set_page_config(layout="wide",
|
| 13 |
page_title="TPL Conservation Almanac",
|
|
|
|
| 83 |
|
| 84 |
'''
|
| 85 |
Exploratory data queries:
|
| 86 |
+
- Which state senate districts have the highest percentage of protected areas?
|
| 87 |
+
- Which house legislative districts have the highest conservation investments?
|
| 88 |
- Which states have the highest average cost per acre?
|
| 89 |
'''
|
| 90 |
|
|
|
|
| 109 |
("system", template),
|
| 110 |
("human", "{input}")
|
| 111 |
]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
|
| 112 |
+
landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
|
| 113 |
+
svi = svi_z8.schema(), mobi = mobi_z8.schema(),
|
| 114 |
+
lower_chamber = lower_chamber_z8.schema(), upper_chamber = upper_chamber_z8.schema())
|
| 115 |
+
|
| 116 |
|
| 117 |
structured_llm = llm.with_structured_output(SQLResponse)
|
| 118 |
few_shot_structured_llm = prompt | structured_llm
|
|
|
|
| 219 |
else:
|
| 220 |
# selected all states, so no need to filter
|
| 221 |
style=tpl_style_default(paint, pmtiles)
|
| 222 |
+
if 'llm_bounds' in locals():
|
| 223 |
bounds = llm_bounds
|
| 224 |
else:
|
| 225 |
bounds = get_bounds(state_choice, county_choice, m)
|
app/system_prompt.txt
CHANGED
|
@@ -22,6 +22,14 @@ svi:
|
|
| 22 |
- Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
|
| 23 |
- Schema: {svi}
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
For example:
|
| 27 |
{{
|
|
@@ -81,3 +89,29 @@ example_assistant: {{"sql_query":
|
|
| 81 |
"explanation":"I joined `conservation_almanac` with `richness` to retrieve species richness for protected areas in the `conservation_almanac`. I returned 100 areas with highest levels of carbon.
|
| 82 |
}}
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
- Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
|
| 23 |
- Schema: {svi}
|
| 24 |
|
| 25 |
+
lower_chamber:
|
| 26 |
+
- Definition: State House legislative districts for all U.S. states, also known as State Legislative District Lower (SLDL) or the lower chamber; excludes Nebraska and D.C., which have no lower chamber.
|
| 27 |
+
- Schema: {lower_chamber}
|
| 28 |
+
|
| 29 |
+
upper_chamber:
|
| 30 |
+
- Definition: State Senate legislative districts for all U.S. states, also known as State Legislative District Upper (SLDU) or the upper chamber.
|
| 31 |
+
- Schema: {upper_chamber}
|
| 32 |
+
|
| 33 |
|
| 34 |
For example:
|
| 35 |
{{
|
|
|
|
| 89 |
"explanation":"I joined `conservation_almanac` with `richness` to retrieve species richness for protected areas in the `conservation_almanac`. I returned 100 areas with highest levels of carbon.
|
| 90 |
}}
|
| 91 |
|
| 92 |
+
## Example:
|
| 93 |
+
example_user: "Which house legislative districts have the highest conservation investments?"
|
| 94 |
+
example_assistant: {{"sql_query":
|
| 95 |
+
SELECT ca.fid, ca.geom, ca.site, lc.NAMELSAD, SUM(ca.amount) as total_investment
|
| 96 |
+
FROM conservation_almanac as ca
|
| 97 |
+
LEFT JOIN lower_chamber as lc
|
| 98 |
+
USING (h8)
|
| 99 |
+
GROUP BY fid, geom, site, NAMELSAD
|
| 100 |
+
ORDER BY total_investment ASC LIMIT 100;
|
| 101 |
+
"explanation":"I intersected `lower_chamber` districts with `conservation_almanac` spending data to calculate total conservation investment within each district. I then returned the 100 districts with the lowest investment levels."
|
| 102 |
+
}}
|
| 103 |
+
|
| 104 |
+
## Example:
|
| 105 |
+
example_user: "Which state senate districts have the highest percentage of protected areas?"
|
| 106 |
+
example_assistant: {{"sql_query":
|
| 107 |
+
SELECT lc.NAMELSAD, lc.STATEFP,
|
| 108 |
+
COUNT(DISTINCT ca.h8)::float / COUNT(DISTINCT lc.h8) AS fraction_protected
|
| 109 |
+
FROM lower_chamber AS lc
|
| 110 |
+
LEFT JOIN conservation_almanac AS ca
|
| 111 |
+
USING (h8)
|
| 112 |
+
GROUP BY lc.NAMELSAD, lc.STATEFP
|
| 113 |
+
ORDER BY fraction_protected DESC
|
| 114 |
+
LIMIT 20;
|
| 115 |
+
"explanation":"I joined `lower_chamber` with `conservation_almanac` on the `h8` column. Since each `h8` hex is the same size, the fraction of a district that is protected can be calculated as the number of unique `h8` hexes with protected areas divided by the total number of `h8` hexes in the district. The query returns the 20 districts with the highest fraction of protected land."
|
| 116 |
+
}}
|
| 117 |
+
|
app/variables.py
CHANGED
|
@@ -13,7 +13,7 @@ duckdb_install_h3()
|
|
| 13 |
|
| 14 |
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
|
| 15 |
con.raw_sql("SET THREADS=100;")
|
| 16 |
-
set_secrets(con)
|
| 17 |
|
| 18 |
# Get signed URLs to access license-controlled layers
|
| 19 |
key = st.secrets["MINIO_KEY"]
|
|
@@ -30,6 +30,8 @@ county_bounds_url = "https://minio.carlboettiger.info/public-census/2024/county/
|
|
| 30 |
mobi_z8_url = "https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet"
|
| 31 |
svi_z8_url = "https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet"
|
| 32 |
carbon_z8_url = "https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet"
|
|
|
|
|
|
|
| 33 |
|
| 34 |
tpl_z8 = con.read_parquet(tpl_z8_url, table_name = 'conservation_almanac')
|
| 35 |
landvote_z8 = con.read_parquet(landvote_z8_url, table_name = 'landvote')
|
|
@@ -40,6 +42,8 @@ county_bounds = con.read_parquet(county_bounds_url)
|
|
| 40 |
mobi_z8 = con.read_parquet(mobi_z8_url, table_name = 'mobi')
|
| 41 |
svi_z8 = con.read_parquet(svi_z8_url,table_name = 'svi')
|
| 42 |
carbon_z8 = con.read_parquet(carbon_z8_url, table_name = 'carbon')
|
|
|
|
|
|
|
| 43 |
|
| 44 |
states = (
|
| 45 |
"All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
|
|
@@ -162,7 +166,6 @@ basemaps = ['CartoDB.DarkMatter', 'CartoDB.DarkMatterNoLabels',
|
|
| 162 |
help_message = '''
|
| 163 |
- ❌ Safari/iOS not fully supported. For Safari/iOS users, change the **Leafmap module** from MapLibre to Folium in **(Map Settings)** below.
|
| 164 |
- 📊 Use this sidebar to color-code the map by different attributes **(Group by)**
|
| 165 |
-
- 💬 For a more tailored experience, query our dataset of protected areas and their precomputed metrics for each of the displayed layers, using the experimental chatbot. The language model tries to answer natural language questions by drawing only from curated datasets (listed below).
|
| 166 |
'''
|
| 167 |
|
| 168 |
#maplibregl tooltip
|
|
@@ -253,18 +256,90 @@ openrouter_api = os.getenv("OPENROUTER_API_KEY")
|
|
| 253 |
if openrouter_api is None:
|
| 254 |
openrouter_api = st.secrets["OPENROUTER_API_KEY"]
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
llm_options = {
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
"
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
|
| 15 |
con.raw_sql("SET THREADS=100;")
|
| 16 |
+
set_secrets(con, "", "", "minio.carlboettiger.info")
|
| 17 |
|
| 18 |
# Get signed URLs to access license-controlled layers
|
| 19 |
key = st.secrets["MINIO_KEY"]
|
|
|
|
| 30 |
mobi_z8_url = "https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet"
|
| 31 |
svi_z8_url = "https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet"
|
| 32 |
carbon_z8_url = "https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet"
|
| 33 |
+
lower_chamber_z8_url = "s3://public-census/2024/sld/lower/z8/**"
|
| 34 |
+
upper_chamber_z8_url = "s3://public-census/2024/sld/upper/z8/**"
|
| 35 |
|
| 36 |
tpl_z8 = con.read_parquet(tpl_z8_url, table_name = 'conservation_almanac')
|
| 37 |
landvote_z8 = con.read_parquet(landvote_z8_url, table_name = 'landvote')
|
|
|
|
| 42 |
mobi_z8 = con.read_parquet(mobi_z8_url, table_name = 'mobi')
|
| 43 |
svi_z8 = con.read_parquet(svi_z8_url,table_name = 'svi')
|
| 44 |
carbon_z8 = con.read_parquet(carbon_z8_url, table_name = 'carbon')
|
| 45 |
+
lower_chamber_z8 = con.read_parquet(lower_chamber_z8_url, table_name = 'lower_chamber')
|
| 46 |
+
upper_chamber_z8 = con.read_parquet(upper_chamber_z8_url, table_name = 'upper_chamber')
|
| 47 |
|
| 48 |
states = (
|
| 49 |
"All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
|
|
|
|
| 166 |
help_message = '''
|
| 167 |
- ❌ Safari/iOS not fully supported. For Safari/iOS users, change the **Leafmap module** from MapLibre to Folium in **(Map Settings)** below.
|
| 168 |
- 📊 Use this sidebar to color-code the map by different attributes **(Group by)**
|
|
|
|
| 169 |
'''
|
| 170 |
|
| 171 |
#maplibregl tooltip
|
|
|
|
| 256 |
if openrouter_api is None:
|
| 257 |
openrouter_api = st.secrets["OPENROUTER_API_KEY"]
|
| 258 |
|
| 259 |
+
openrouter_endpoint="https://openrouter.ai/api/v1"
|
| 260 |
+
nrp_endpoint="https://ellm.nrp-nautilus.io/v1"
|
| 261 |
+
|
| 262 |
+
# don't use a provider that collects data
|
| 263 |
+
data_policy = {
|
| 264 |
+
"provider": {
|
| 265 |
+
"data_collection": "deny"
|
| 266 |
+
}
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
llm_options = {
|
| 270 |
+
"kat-coder-pro": ChatOpenAI(
|
| 271 |
+
model="kwaipilot/kat-coder-pro:free",
|
| 272 |
+
api_key=openrouter_api,
|
| 273 |
+
base_url=openrouter_endpoint,
|
| 274 |
+
temperature=0,
|
| 275 |
+
extra_body=data_policy
|
| 276 |
+
),
|
| 277 |
+
|
| 278 |
+
"llama-3.3-70b-instruct": ChatOpenAI(
|
| 279 |
+
model="meta-llama/llama-3.3-70b-instruct:free",
|
| 280 |
+
api_key=openrouter_api,
|
| 281 |
+
base_url=openrouter_endpoint,
|
| 282 |
+
temperature=0,
|
| 283 |
+
extra_body=data_policy
|
| 284 |
+
),
|
| 285 |
+
|
| 286 |
+
"gpt-oss-20b": ChatOpenAI(
|
| 287 |
+
model="openai/gpt-oss-20b:free",
|
| 288 |
+
api_key=openrouter_api,
|
| 289 |
+
base_url=openrouter_endpoint,
|
| 290 |
+
temperature=0,
|
| 291 |
+
extra_body=data_policy
|
| 292 |
+
),
|
| 293 |
+
|
| 294 |
+
"qwen3-coder": ChatOpenAI(
|
| 295 |
+
model="qwen/qwen3-coder:free",
|
| 296 |
+
api_key=openrouter_api,
|
| 297 |
+
base_url=openrouter_endpoint,
|
| 298 |
+
temperature=0,
|
| 299 |
+
extra_body=data_policy
|
| 300 |
+
),
|
| 301 |
+
|
| 302 |
+
"dolphin-mistral-24b-venice-edition": ChatOpenAI(
|
| 303 |
+
model="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 304 |
+
api_key=openrouter_api,
|
| 305 |
+
base_url=openrouter_endpoint,
|
| 306 |
+
temperature=0,
|
| 307 |
+
extra_body=data_policy
|
| 308 |
+
),
|
| 309 |
+
|
| 310 |
+
"nemotron-nano-9b-v2": ChatOpenAI(
|
| 311 |
+
model="nvidia/nemotron-nano-9b-v2:free",
|
| 312 |
+
api_key=openrouter_api,
|
| 313 |
+
base_url=openrouter_endpoint,
|
| 314 |
+
temperature=0,
|
| 315 |
+
extra_body=data_policy
|
| 316 |
+
),
|
| 317 |
+
|
| 318 |
+
"gemma-3-27b-it": ChatOpenAI(
|
| 319 |
+
model="gemma3",
|
| 320 |
+
api_key=api_key,
|
| 321 |
+
base_url=nrp_endpoint,
|
| 322 |
+
temperature=0
|
| 323 |
+
),
|
| 324 |
+
|
| 325 |
+
"gpt-oss-120b": ChatOpenAI(
|
| 326 |
+
model="gpt-oss",
|
| 327 |
+
api_key=api_key,
|
| 328 |
+
base_url=nrp_endpoint,
|
| 329 |
+
temperature=0
|
| 330 |
+
),
|
| 331 |
+
|
| 332 |
+
"glm-4.6-gptq-int4-int8mix": ChatOpenAI(
|
| 333 |
+
model="glm-4.6",
|
| 334 |
+
api_key=api_key,
|
| 335 |
+
base_url=nrp_endpoint,
|
| 336 |
+
temperature=0
|
| 337 |
+
),
|
| 338 |
+
|
| 339 |
+
"glm-4.5v-fp8": ChatOpenAI(
|
| 340 |
+
model="glm-v",
|
| 341 |
+
api_key=api_key,
|
| 342 |
+
base_url=nrp_endpoint,
|
| 343 |
+
temperature=0
|
| 344 |
+
),
|
| 345 |
+
}
|