Cassie Buhler commited on
Commit
2283253
·
unverified ·
2 Parent(s): 24714e3 5b58f95

Merge pull request #8 from boettiger-lab/feat/update_app

Browse files
Files changed (3) hide show
  1. app/app.py +8 -3
  2. app/system_prompt.txt +34 -0
  3. app/variables.py +91 -16
app/app.py CHANGED
@@ -7,6 +7,7 @@ import ibis.selectors as s
7
  from ibis import _
8
  import ibis
9
  import openai
 
10
 
11
  st.set_page_config(layout="wide",
12
  page_title="TPL Conservation Almanac",
@@ -82,6 +83,8 @@ with chatbot_container:
82
 
83
  '''
84
  Exploratory data queries:
 
 
85
  - Which states have the highest average cost per acre?
86
  '''
87
 
@@ -106,8 +109,10 @@ prompt = ChatPromptTemplate.from_messages([
106
  ("system", template),
107
  ("human", "{input}")
108
  ]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
109
- landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
110
- svi = svi_z8.schema(), mobi = mobi_z8.schema())
 
 
111
 
112
  structured_llm = llm.with_structured_output(SQLResponse)
113
  few_shot_structured_llm = prompt | structured_llm
@@ -214,7 +219,7 @@ if 'style' not in locals():
214
  else:
215
  # selected all states, so no need to filter
216
  style=tpl_style_default(paint, pmtiles)
217
- if 'llm_output' in locals():
218
  bounds = llm_bounds
219
  else:
220
  bounds = get_bounds(state_choice, county_choice, m)
 
7
  from ibis import _
8
  import ibis
9
  import openai
10
+ import pandas as pd
11
 
12
  st.set_page_config(layout="wide",
13
  page_title="TPL Conservation Almanac",
 
83
 
84
  '''
85
  Exploratory data queries:
86
+ - Which state senate districts have the highest percentage of protected areas?
87
+ - Which house legislative districts have the highest conservation investments?
88
  - Which states have the highest average cost per acre?
89
  '''
90
 
 
109
  ("system", template),
110
  ("human", "{input}")
111
  ]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
112
+ landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
113
+ svi = svi_z8.schema(), mobi = mobi_z8.schema(),
114
+ lower_chamber = lower_chamber_z8.schema(), upper_chamber = upper_chamber_z8.schema())
115
+
116
 
117
  structured_llm = llm.with_structured_output(SQLResponse)
118
  few_shot_structured_llm = prompt | structured_llm
 
219
  else:
220
  # selected all states, so no need to filter
221
  style=tpl_style_default(paint, pmtiles)
222
+ if 'llm_bounds' in locals():
223
  bounds = llm_bounds
224
  else:
225
  bounds = get_bounds(state_choice, county_choice, m)
app/system_prompt.txt CHANGED
@@ -22,6 +22,14 @@ svi:
22
  - Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
23
  - Schema: {svi}
24
 
 
 
 
 
 
 
 
 
25
 
26
  For example:
27
  {{
@@ -81,3 +89,29 @@ example_assistant: {{"sql_query":
81
  "explanation":"I joined `conservation_almanac` with `richness` to retrieve species richness for protected areas in the `conservation_almanac`. I returned 100 areas with highest levels of carbon.
82
  }}
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  - Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
23
  - Schema: {svi}
24
 
25
+ lower_chamber:
26
+ - Definition: State House legislative districts for all U.S. states, also known as State Legislative District Lower (SLDL) or the lower chamber; excludes Nebraska and D.C., which have no lower chamber.
27
+ - Schema: {lower_chamber}
28
+
29
+ upper_chamber:
30
+ - Definition: State Senate legislative districts for all U.S. states, also known as State Legislative District Upper (SLDU) or the upper chamber.
31
+ - Schema: {upper_chamber}
32
+
33
 
34
  For example:
35
  {{
 
89
  "explanation":"I joined `conservation_almanac` with `richness` to retrieve species richness for protected areas in the `conservation_almanac`. I returned 100 areas with highest levels of carbon.
90
  }}
91
 
92
+ ## Example:
93
+ example_user: "Which house legislative districts have the highest conservation investments?"
94
+ example_assistant: {{"sql_query":
95
+ SELECT ca.fid, ca.geom, ca.site, lc.NAMELSAD, SUM(ca.amount) as total_investment
96
+ FROM conservation_almanac as ca
97
+ LEFT JOIN lower_chamber as lc
98
+ USING (h8)
99
+ GROUP BY fid, geom, site, NAMELSAD
100
+ ORDER BY total_investment ASC LIMIT 100;
101
+ "explanation":"I intersected `lower_chamber` districts with `conservation_almanac` spending data to calculate total conservation investment within each district. I then returned the 100 districts with the lowest investment levels."
102
+ }}
103
+
104
+ ## Example:
105
+ example_user: "Which state senate districts have the highest percentage of protected areas?"
106
+ example_assistant: {{"sql_query":
107
+ SELECT lc.NAMELSAD, lc.STATEFP,
108
+ COUNT(DISTINCT ca.h8)::float / COUNT(DISTINCT lc.h8) AS fraction_protected
109
+ FROM lower_chamber AS lc
110
+ LEFT JOIN conservation_almanac AS ca
111
+ USING (h8)
112
+ GROUP BY lc.NAMELSAD, lc.STATEFP
113
+ ORDER BY fraction_protected DESC
114
+ LIMIT 20;
115
+ "explanation":"I joined `lower_chamber` with `conservation_almanac` on the `h8` column. Since each `h8` hex is the same size, the fraction of a district that is protected can be calculated as the number of unique `h8` hexes with protected areas divided by the total number of `h8` hexes in the district. The query returns the 20 districts with the highest fraction of protected land."
116
+ }}
117
+
app/variables.py CHANGED
@@ -13,7 +13,7 @@ duckdb_install_h3()
13
 
14
  con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
15
  con.raw_sql("SET THREADS=100;")
16
- set_secrets(con)
17
 
18
  # Get signed URLs to access license-controlled layers
19
  key = st.secrets["MINIO_KEY"]
@@ -30,6 +30,8 @@ county_bounds_url = "https://minio.carlboettiger.info/public-census/2024/county/
30
  mobi_z8_url = "https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet"
31
  svi_z8_url = "https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet"
32
  carbon_z8_url = "https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet"
 
 
33
 
34
  tpl_z8 = con.read_parquet(tpl_z8_url, table_name = 'conservation_almanac')
35
  landvote_z8 = con.read_parquet(landvote_z8_url, table_name = 'landvote')
@@ -40,6 +42,8 @@ county_bounds = con.read_parquet(county_bounds_url)
40
  mobi_z8 = con.read_parquet(mobi_z8_url, table_name = 'mobi')
41
  svi_z8 = con.read_parquet(svi_z8_url,table_name = 'svi')
42
  carbon_z8 = con.read_parquet(carbon_z8_url, table_name = 'carbon')
 
 
43
 
44
  states = (
45
  "All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
@@ -162,7 +166,6 @@ basemaps = ['CartoDB.DarkMatter', 'CartoDB.DarkMatterNoLabels',
162
  help_message = '''
163
  - ❌ Safari/iOS not fully supported. For Safari/iOS users, change the **Leafmap module** from MapLibre to Folium in **(Map Settings)** below.
164
  - 📊 Use this sidebar to color-code the map by different attributes **(Group by)**
165
- - 💬 For a more tailored experience, query our dataset of protected areas and their precomputed metrics for each of the displayed layers, using the experimental chatbot. The language model tries to answer natural language questions by drawing only from curated datasets (listed below).
166
  '''
167
 
168
  #maplibregl tooltip
@@ -253,18 +256,90 @@ openrouter_api = os.getenv("OPENROUTER_API_KEY")
253
  if openrouter_api is None:
254
  openrouter_api = st.secrets["OPENROUTER_API_KEY"]
255
 
 
 
 
 
 
 
 
 
 
 
256
  llm_options = {
257
- "gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
258
- "mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
259
- "devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
260
- "deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
261
- "kimi-dev-72b": ChatOpenAI(model = "moonshotai/kimi-dev-72b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
262
- "hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
263
- # "deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
264
- "olmo": ChatOpenAI(model = "olmo", api_key=api_key, base_url = "https://llm.nrp-nautilus.io/", temperature=0),
265
- "llama3": ChatOpenAI(model = "llama3", api_key=api_key, base_url = "https://llm.nrp-nautilus.io/", temperature=0),
266
- # "deepseek-r1": BaseChatOpenAI(model = "deepseek-r1", api_key=api_key, base_url = "https://llm.nrp-nautilus.io/", temperature=0),
267
- "qwen3": ChatOpenAI(model = "qwen3", api_key=api_key, base_url = "https://llm.nrp-nautilus.io/", temperature=0),
268
- "gemma3": ChatOpenAI(model = "gemma3", api_key=api_key, base_url = "https://llm.nrp-nautilus.io/", temperature=0),
269
-
270
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
15
  con.raw_sql("SET THREADS=100;")
16
+ set_secrets(con, "", "", "minio.carlboettiger.info")
17
 
18
  # Get signed URLs to access license-controlled layers
19
  key = st.secrets["MINIO_KEY"]
 
30
  mobi_z8_url = "https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet"
31
  svi_z8_url = "https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet"
32
  carbon_z8_url = "https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet"
33
+ lower_chamber_z8_url = "s3://public-census/2024/sld/lower/z8/**"
34
+ upper_chamber_z8_url = "s3://public-census/2024/sld/upper/z8/**"
35
 
36
  tpl_z8 = con.read_parquet(tpl_z8_url, table_name = 'conservation_almanac')
37
  landvote_z8 = con.read_parquet(landvote_z8_url, table_name = 'landvote')
 
42
  mobi_z8 = con.read_parquet(mobi_z8_url, table_name = 'mobi')
43
  svi_z8 = con.read_parquet(svi_z8_url,table_name = 'svi')
44
  carbon_z8 = con.read_parquet(carbon_z8_url, table_name = 'carbon')
45
+ lower_chamber_z8 = con.read_parquet(lower_chamber_z8_url, table_name = 'lower_chamber')
46
+ upper_chamber_z8 = con.read_parquet(upper_chamber_z8_url, table_name = 'upper_chamber')
47
 
48
  states = (
49
  "All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
 
166
  help_message = '''
167
  - ❌ Safari/iOS not fully supported. For Safari/iOS users, change the **Leafmap module** from MapLibre to Folium in **(Map Settings)** below.
168
  - 📊 Use this sidebar to color-code the map by different attributes **(Group by)**
 
169
  '''
170
 
171
  #maplibregl tooltip
 
256
  if openrouter_api is None:
257
  openrouter_api = st.secrets["OPENROUTER_API_KEY"]
258
 
259
+ openrouter_endpoint="https://openrouter.ai/api/v1"
260
+ nrp_endpoint="https://ellm.nrp-nautilus.io/v1"
261
+
262
+ # don't use a provider that collects data
263
+ data_policy = {
264
+ "provider": {
265
+ "data_collection": "deny"
266
+ }
267
+ }
268
+
269
  llm_options = {
270
+ "kat-coder-pro": ChatOpenAI(
271
+ model="kwaipilot/kat-coder-pro:free",
272
+ api_key=openrouter_api,
273
+ base_url=openrouter_endpoint,
274
+ temperature=0,
275
+ extra_body=data_policy
276
+ ),
277
+
278
+ "llama-3.3-70b-instruct": ChatOpenAI(
279
+ model="meta-llama/llama-3.3-70b-instruct:free",
280
+ api_key=openrouter_api,
281
+ base_url=openrouter_endpoint,
282
+ temperature=0,
283
+ extra_body=data_policy
284
+ ),
285
+
286
+ "gpt-oss-20b": ChatOpenAI(
287
+ model="openai/gpt-oss-20b:free",
288
+ api_key=openrouter_api,
289
+ base_url=openrouter_endpoint,
290
+ temperature=0,
291
+ extra_body=data_policy
292
+ ),
293
+
294
+ "qwen3-coder": ChatOpenAI(
295
+ model="qwen/qwen3-coder:free",
296
+ api_key=openrouter_api,
297
+ base_url=openrouter_endpoint,
298
+ temperature=0,
299
+ extra_body=data_policy
300
+ ),
301
+
302
+ "dolphin-mistral-24b-venice-edition": ChatOpenAI(
303
+ model="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
304
+ api_key=openrouter_api,
305
+ base_url=openrouter_endpoint,
306
+ temperature=0,
307
+ extra_body=data_policy
308
+ ),
309
+
310
+ "nemotron-nano-9b-v2": ChatOpenAI(
311
+ model="nvidia/nemotron-nano-9b-v2:free",
312
+ api_key=openrouter_api,
313
+ base_url=openrouter_endpoint,
314
+ temperature=0,
315
+ extra_body=data_policy
316
+ ),
317
+
318
+ "gemma-3-27b-it": ChatOpenAI(
319
+ model="gemma3",
320
+ api_key=api_key,
321
+ base_url=nrp_endpoint,
322
+ temperature=0
323
+ ),
324
+
325
+ "gpt-oss-120b": ChatOpenAI(
326
+ model="gpt-oss",
327
+ api_key=api_key,
328
+ base_url=nrp_endpoint,
329
+ temperature=0
330
+ ),
331
+
332
+ "glm-4.6-gptq-int4-int8mix": ChatOpenAI(
333
+ model="glm-4.6",
334
+ api_key=api_key,
335
+ base_url=nrp_endpoint,
336
+ temperature=0
337
+ ),
338
+
339
+ "glm-4.5v-fp8": ChatOpenAI(
340
+ model="glm-v",
341
+ api_key=api_key,
342
+ base_url=nrp_endpoint,
343
+ temperature=0
344
+ ),
345
+ }