Spaces:
Sleeping
Sleeping
Commit
·
9adb68a
1
Parent(s):
08ae6d1
let's see if this works...
Browse fileschatbot has multiple datasets now. Also, pmtiles only shows up in notebook-- not sure if this will work
- .gitignore +5 -0
- app/app.py +24 -17
- app/system_prompt.txt +53 -9
- app/utils.py +22 -22
- app/variables.py +22 -66
.gitignore
CHANGED
|
@@ -9,3 +9,8 @@
|
|
| 9 |
*.vrt
|
| 10 |
.streamlit
|
| 11 |
__pycache__
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
*.vrt
|
| 10 |
.streamlit
|
| 11 |
__pycache__
|
| 12 |
+
*.block
|
| 13 |
+
*.tmp
|
| 14 |
+
*.parquet
|
| 15 |
+
*.geojson
|
| 16 |
+
**/*.shp
|
app/app.py
CHANGED
|
@@ -4,13 +4,28 @@ from cng.h3 import *
|
|
| 4 |
from utils import *
|
| 5 |
from ibis import _
|
| 6 |
|
| 7 |
-
current_tables = con.list_tables()
|
| 8 |
|
| 9 |
-
if "
|
| 10 |
-
|
| 11 |
|
| 12 |
-
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
st.set_page_config(layout="wide",
|
| 16 |
page_title="TPL Conservation Almanac",
|
|
@@ -25,7 +40,6 @@ basemaps = leafmap.basemaps.keys()
|
|
| 25 |
|
| 26 |
m = leafmap.Map(style = "positron")
|
| 27 |
|
| 28 |
-
|
| 29 |
from datetime import time
|
| 30 |
|
| 31 |
with st.sidebar:
|
|
@@ -93,7 +107,9 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 93 |
prompt = ChatPromptTemplate.from_messages([
|
| 94 |
("system", template),
|
| 95 |
("human", "{input}")
|
| 96 |
-
]).partial(dialect="duckdb",
|
|
|
|
|
|
|
| 97 |
|
| 98 |
# chatbot_toggles = {key: False for key in keys}
|
| 99 |
structured_llm = llm.with_structured_output(SQLResponse)
|
|
@@ -115,7 +131,7 @@ def run_sql(query,paint):
|
|
| 115 |
st.success(explanation)
|
| 116 |
return pd.DataFrame({'fid' : []})
|
| 117 |
|
| 118 |
-
result =
|
| 119 |
if result.empty :
|
| 120 |
explanation = "This query did not return any results. Please try again with a different query."
|
| 121 |
st.warning(explanation, icon="⚠️")
|
|
@@ -127,9 +143,6 @@ def run_sql(query,paint):
|
|
| 127 |
return result
|
| 128 |
elif ("fid" and "geom" in result.columns):
|
| 129 |
style = tpl_style(result["fid"].tolist(), paint)
|
| 130 |
-
# legend, position, bg_color, fontsize = get_legend(paint)
|
| 131 |
-
|
| 132 |
-
# m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
|
| 133 |
m.add_pmtiles(pmtiles, style=style, opacity=0.5, tooltip=True, fit_bounds=True)
|
| 134 |
m.fit_bounds(result.total_bounds.tolist())
|
| 135 |
result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
|
|
@@ -180,14 +193,8 @@ if 'out' not in locals():
|
|
| 180 |
m.add_pmtiles(pmtiles, style=tpl_style(unique_ids, paint), opacity=0.5, tooltip=True, fit_bounds=True)
|
| 181 |
else:
|
| 182 |
m.add_pmtiles(pmtiles, style=tpl_style_default(paint), opacity=0.5, tooltip=True, fit_bounds=True)
|
| 183 |
-
|
| 184 |
-
# legend, position, bg_color, fontsize = get_legend(paint)
|
| 185 |
-
# m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
|
| 186 |
-
#zoom to state(s)
|
| 187 |
fit_bounds(state_choice, county_choice, m)
|
| 188 |
|
| 189 |
-
## Render display panels
|
| 190 |
-
#
|
| 191 |
|
| 192 |
m.to_streamlit()
|
| 193 |
with st.expander("🔍 View/download data"): # adding data table
|
|
@@ -216,7 +223,7 @@ with col1:
|
|
| 216 |
get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area")
|
| 217 |
|
| 218 |
with col2:
|
| 219 |
-
gdf_landvote = group_data(gdf_landvote.filter(_.
|
| 220 |
get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures')
|
| 221 |
|
| 222 |
st.divider()
|
|
|
|
| 4 |
from utils import *
|
| 5 |
from ibis import _
|
| 6 |
|
| 7 |
+
# current_tables = con.list_tables()
|
| 8 |
|
| 9 |
+
# if "conservation_almanac" not in set(current_tables):
|
| 10 |
+
# con.create_table("conservation_almanac", tpl_z8)
|
| 11 |
|
| 12 |
+
# if "landvote" not in set(current_tables):
|
| 13 |
+
# con.create_table("landvote", landvote_z8)
|
| 14 |
|
| 15 |
+
# if "carbon" not in set(current_tables):
|
| 16 |
+
# con.create_table("carbon", carbon_z8)
|
| 17 |
+
|
| 18 |
+
# if "mobi" not in set(current_tables):
|
| 19 |
+
# con.create_table("mobi", mobi_z8)
|
| 20 |
+
|
| 21 |
+
# if "svi" not in set(current_tables):
|
| 22 |
+
# con.create_table("svi", svi_z8)
|
| 23 |
+
|
| 24 |
+
# conservation_almanac = con.table("conservation_almanac")
|
| 25 |
+
# landvote = con.table("landvote")
|
| 26 |
+
# carbon = con.table("carbon")
|
| 27 |
+
# mobi = con.table("mobi")
|
| 28 |
+
# svi = con.table("svi")
|
| 29 |
|
| 30 |
st.set_page_config(layout="wide",
|
| 31 |
page_title="TPL Conservation Almanac",
|
|
|
|
| 40 |
|
| 41 |
m = leafmap.Map(style = "positron")
|
| 42 |
|
|
|
|
| 43 |
from datetime import time
|
| 44 |
|
| 45 |
with st.sidebar:
|
|
|
|
| 107 |
prompt = ChatPromptTemplate.from_messages([
|
| 108 |
("system", template),
|
| 109 |
("human", "{input}")
|
| 110 |
+
]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
|
| 111 |
+
landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
|
| 112 |
+
svi = svi_z8.schema(), mobi = mobi_z8.schema())
|
| 113 |
|
| 114 |
# chatbot_toggles = {key: False for key in keys}
|
| 115 |
structured_llm = llm.with_structured_output(SQLResponse)
|
|
|
|
| 131 |
st.success(explanation)
|
| 132 |
return pd.DataFrame({'fid' : []})
|
| 133 |
|
| 134 |
+
result = con.sql(sql_query).distinct().execute()
|
| 135 |
if result.empty :
|
| 136 |
explanation = "This query did not return any results. Please try again with a different query."
|
| 137 |
st.warning(explanation, icon="⚠️")
|
|
|
|
| 143 |
return result
|
| 144 |
elif ("fid" and "geom" in result.columns):
|
| 145 |
style = tpl_style(result["fid"].tolist(), paint)
|
|
|
|
|
|
|
|
|
|
| 146 |
m.add_pmtiles(pmtiles, style=style, opacity=0.5, tooltip=True, fit_bounds=True)
|
| 147 |
m.fit_bounds(result.total_bounds.tolist())
|
| 148 |
result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
|
|
|
|
| 193 |
m.add_pmtiles(pmtiles, style=tpl_style(unique_ids, paint), opacity=0.5, tooltip=True, fit_bounds=True)
|
| 194 |
else:
|
| 195 |
m.add_pmtiles(pmtiles, style=tpl_style_default(paint), opacity=0.5, tooltip=True, fit_bounds=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
fit_bounds(state_choice, county_choice, m)
|
| 197 |
|
|
|
|
|
|
|
| 198 |
|
| 199 |
m.to_streamlit()
|
| 200 |
with st.expander("🔍 View/download data"): # adding data table
|
|
|
|
| 223 |
get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area")
|
| 224 |
|
| 225 |
with col2:
|
| 226 |
+
gdf_landvote = group_data(gdf_landvote.filter(_.status == 'Pass'), 'Measure Cost')
|
| 227 |
get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures')
|
| 228 |
|
| 229 |
st.divider()
|
app/system_prompt.txt
CHANGED
|
@@ -1,4 +1,27 @@
|
|
| 1 |
-
You are an expert in SQL and an assistant for mapping and analyzing the Trust for Public Land (TPL) data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
For example:
|
| 4 |
{{
|
|
@@ -8,11 +31,9 @@ For example:
|
|
| 8 |
|
| 9 |
Ensure the response contains only this JSON object, with no additional text, formatting, or commentary.
|
| 10 |
|
| 11 |
-
ONLY write SQL queries using the records and columns that exist in the table called mydata. This is the schema for mydata: {table_info}.
|
| 12 |
-
|
| 13 |
# Important Details
|
| 14 |
-
|
| 15 |
-
- For
|
| 16 |
- ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
|
| 17 |
- Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
| 18 |
- Wrap values that are strings in single quotes (') to distinguish them from column names.
|
|
@@ -22,17 +43,40 @@ ONLY write SQL queries using the records and columns that exist in the table cal
|
|
| 22 |
## Example:
|
| 23 |
example_user: "What is most expensive protected site?"
|
| 24 |
example_assistant: {{"sql_query":
|
| 25 |
-
SELECT "fid", "Site_Name"
|
| 26 |
-
FROM
|
| 27 |
-
WHERE "Amount" = (SELECT MAX("Amount") FROM
|
| 28 |
"explanation":"I selected the site with the highest `Amount`.
|
| 29 |
}}
|
| 30 |
|
| 31 |
## Example:
|
| 32 |
example_user: "Which sites are owned, managed or sponsored by the Trust for Public Land?"
|
| 33 |
example_assistant: {{"sql_query":
|
| 34 |
-
SELECT DISTINCT "fid", "Site_Name", "
|
| 35 |
WHERE "Owner_Name" ILIKE '%Trust for Public Land%' OR "Manager_Name" ILIKE '%Trust for Public Land%' OR "Sponsor_Name" ILIKE '%Trust for Public Land%'
|
| 36 |
ORDER BY "Site_Name";
|
| 37 |
"explanation":"I selected all sites affiliated with the Trust for Public Land.
|
| 38 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in SQL and an assistant for mapping and analyzing the Trust for Public Land (TPL) data. You are provided multiple tables and must identify which table(s) to use. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
|
| 2 |
+
|
| 3 |
+
ONLY write SQL queries using the records and columns that exist in the relevant table. You have access to these tables:
|
| 4 |
+
|
| 5 |
+
conservation_almanac:
|
| 6 |
+
- Definition: Protected areas database tracking public spending on land conservation.
|
| 7 |
+
- Schema: {conservation_almanac}
|
| 8 |
+
|
| 9 |
+
landvote:
|
| 10 |
+
- Definition: Tracks land conservation ballot measures.
|
| 11 |
+
- Schema: {landvote}
|
| 12 |
+
|
| 13 |
+
carbon:
|
| 14 |
+
- Definition: level of irrecoverable carbon
|
| 15 |
+
- Schema: {carbon}
|
| 16 |
+
|
| 17 |
+
mobi:
|
| 18 |
+
- Definition: species richness from the NatureServe's Map of Biodiversity Importance (MOBI)
|
| 19 |
+
- Schema: {mobi}
|
| 20 |
+
|
| 21 |
+
svi:
|
| 22 |
+
- Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
|
| 23 |
+
- Schema: {svi}
|
| 24 |
+
|
| 25 |
|
| 26 |
For example:
|
| 27 |
{{
|
|
|
|
| 31 |
|
| 32 |
Ensure the response contains only this JSON object, with no additional text, formatting, or commentary.
|
| 33 |
|
|
|
|
|
|
|
| 34 |
# Important Details
|
| 35 |
+
- When joining tables, use the `h8` column.
|
| 36 |
+
- For visualization-related queries (e.g., "show me"), ALWAYS include "fid", "Site_Name" in the results,
|
| 37 |
- ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
|
| 38 |
- Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
| 39 |
- Wrap values that are strings in single quotes (') to distinguish them from column names.
|
|
|
|
| 43 |
## Example:
|
| 44 |
example_user: "What is most expensive protected site?"
|
| 45 |
example_assistant: {{"sql_query":
|
| 46 |
+
SELECT "fid", "Site_Name", "Amount"
|
| 47 |
+
FROM conservation_almanac
|
| 48 |
+
WHERE "Amount" = (SELECT MAX("Amount") FROM conservation_almanac);
|
| 49 |
"explanation":"I selected the site with the highest `Amount`.
|
| 50 |
}}
|
| 51 |
|
| 52 |
## Example:
|
| 53 |
example_user: "Which sites are owned, managed or sponsored by the Trust for Public Land?"
|
| 54 |
example_assistant: {{"sql_query":
|
| 55 |
+
SELECT DISTINCT "fid", "Site_Name", "Owner_Name", "Manager_Name", "Sponsor_Name" FROM conservation_almanac
|
| 56 |
WHERE "Owner_Name" ILIKE '%Trust for Public Land%' OR "Manager_Name" ILIKE '%Trust for Public Land%' OR "Sponsor_Name" ILIKE '%Trust for Public Land%'
|
| 57 |
ORDER BY "Site_Name";
|
| 58 |
"explanation":"I selected all sites affiliated with the Trust for Public Land.
|
| 59 |
}}
|
| 60 |
+
|
| 61 |
+
## Example:
|
| 62 |
+
example_user: "Show me protected areas with high levels of carbon"
|
| 63 |
+
example_assistant: {{"sql_query":
|
| 64 |
+
SELECT fid, AVG(carbon) as mean_carbon
|
| 65 |
+
FROM conservation_almanac
|
| 66 |
+
LEFT JOIN carbon
|
| 67 |
+
USING (h8)
|
| 68 |
+
GROUP BY fid
|
| 69 |
+
ORDER BY mean_carbon DESC LIMIT 10;
|
| 70 |
+
"explanation":"I joined `conservation_almanac` with `carbon` to retrieve irrecoverable carbon levels for protected areas in the `conservation_almanac`. I returned 10 areas with highest levels of carbon.
|
| 71 |
+
}}
|
| 72 |
+
|
| 73 |
+
example_user: "Show me protected areas that are in socially vulnerable areas
|
| 74 |
+
example_assistant: {{"sql_query":
|
| 75 |
+
SELECT fid, AVG(carbon) as mean_carbon
|
| 76 |
+
FROM conservation_almanac
|
| 77 |
+
LEFT JOIN carbon
|
| 78 |
+
USING (h8)
|
| 79 |
+
GROUP BY fid
|
| 80 |
+
ORDER BY mean_carbon DESC LIMIT 10;
|
| 81 |
+
"explanation":"I joined `conservation_almanac` with `carbon` to retrieve irrecoverable carbon levels for protected areas in the `conservation_almanac`. I returned 10 areas with highest levels of carbon.
|
| 82 |
+
}}
|
app/utils.py
CHANGED
|
@@ -5,22 +5,23 @@ import altair as alt
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
def get_counties(state_selection):
|
|
|
|
|
|
|
| 8 |
if state_selection != 'All':
|
| 9 |
-
counties =
|
| 10 |
counties = ['All'] + counties['county'].tolist()
|
| 11 |
else:
|
| 12 |
counties = None
|
| 13 |
return counties
|
| 14 |
|
| 15 |
def filter_data(table, state_choice, county_choice, year_range):
|
|
|
|
| 16 |
min_year, max_year = year_range
|
| 17 |
gdf = (table.filter(_.year>=(min_year))
|
| 18 |
.filter(_.year<=(max_year))
|
| 19 |
)
|
| 20 |
if state_choice != "All":
|
| 21 |
-
|
| 22 |
-
gdf = gdf.filter(_.state_name == state_choice)
|
| 23 |
-
|
| 24 |
if (county_choice != "All") and (county_choice):
|
| 25 |
county_choice = re.sub(r"(?i)\s*(County)\b", "", county_choice)
|
| 26 |
gdf = gdf.filter(_.county == county_choice)
|
|
@@ -34,8 +35,8 @@ def group_data(table, style_choice):
|
|
| 34 |
|
| 35 |
def fit_bounds(state_choice, county_choice, m):
|
| 36 |
if state_choice != "All":
|
| 37 |
-
# gdf = county_bounds.filter(_.
|
| 38 |
-
gdf = county_bounds.filter(_.
|
| 39 |
|
| 40 |
if (county_choice != "All") and (county_choice):
|
| 41 |
gdf = gdf.filter(_.county == county_choice)
|
|
@@ -105,7 +106,8 @@ def tpl_style(ids, paint):
|
|
| 105 |
"source": "tpl",
|
| 106 |
"source-layer": source_layer_name,
|
| 107 |
"type": "fill",
|
| 108 |
-
'filter': [
|
|
|
|
| 109 |
"paint": {
|
| 110 |
"fill-color": paint,
|
| 111 |
"fill-opacity": 1
|
|
@@ -129,28 +131,26 @@ def get_legend(paint):
|
|
| 129 |
|
| 130 |
@st.cache_data
|
| 131 |
def tpl_summary(_df):
|
| 132 |
-
summary = _df.group_by(_.
|
| 133 |
-
public_dollars = round( summary.filter(_.
|
| 134 |
-
private_dollars = round( summary.filter(_.
|
| 135 |
-
|
| 136 |
-
# tribal_dollars = tribal_dollars if tribal_dollars else round(tribal_dollars)
|
| 137 |
-
total_dollars = round( summary.agg(total = _.Amount.sum()).to_pandas().values[0][0] )
|
| 138 |
return public_dollars, private_dollars, total_dollars
|
| 139 |
|
| 140 |
# @st.cache_data
|
| 141 |
def calc_delta(_df):
|
| 142 |
deltas = (_df
|
| 143 |
-
.group_by(_.
|
| 144 |
-
.agg(
|
| 145 |
-
.mutate(total = _.
|
| 146 |
.mutate(lag = _.total.lag(1))
|
| 147 |
.mutate(delta = (100*(_.total - _.lag) / _.total).round(2) )
|
| 148 |
# .filter(_.year >=2019)
|
| 149 |
-
.select(_.
|
| 150 |
)
|
| 151 |
-
public_delta = deltas.filter(_.
|
| 152 |
public_delta = 0 if public_delta.empty else public_delta.delta[-1]
|
| 153 |
-
private_delta = deltas.filter(_.
|
| 154 |
private_delta = 0 if private_delta.empty else private_delta.delta[-1]
|
| 155 |
return public_delta, private_delta
|
| 156 |
|
|
@@ -180,9 +180,9 @@ def bar(area_totals, column, paint):
|
|
| 180 |
# .filter(~_.year.isnull())
|
| 181 |
# .filter(_.year > 0)
|
| 182 |
# .group_by([_.year, _[column]])
|
| 183 |
-
# .agg(
|
| 184 |
# .mutate(year = _.year.cast("int"),
|
| 185 |
-
#
|
| 186 |
|
| 187 |
# .to_pandas()
|
| 188 |
# )
|
|
@@ -196,7 +196,7 @@ def chart_time(timeseries, column, paint):
|
|
| 196 |
# use the colors
|
| 197 |
plt = alt.Chart(timeseries).mark_line().encode(
|
| 198 |
x='year:O',
|
| 199 |
-
y = alt.Y('
|
| 200 |
color=alt.Color(column,scale= alt.Scale(domain=domain, range=range_))
|
| 201 |
).properties(height=350)
|
| 202 |
return plt
|
|
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
def get_counties(state_selection):
|
| 8 |
+
tpl_table.head().execute()
|
| 9 |
+
|
| 10 |
if state_selection != 'All':
|
| 11 |
+
counties = tpl_table.filter(_.state == state_selection).select('county').distinct().order_by('county').execute()
|
| 12 |
counties = ['All'] + counties['county'].tolist()
|
| 13 |
else:
|
| 14 |
counties = None
|
| 15 |
return counties
|
| 16 |
|
| 17 |
def filter_data(table, state_choice, county_choice, year_range):
|
| 18 |
+
|
| 19 |
min_year, max_year = year_range
|
| 20 |
gdf = (table.filter(_.year>=(min_year))
|
| 21 |
.filter(_.year<=(max_year))
|
| 22 |
)
|
| 23 |
if state_choice != "All":
|
| 24 |
+
gdf = gdf.filter(_.state == state_choice)
|
|
|
|
|
|
|
| 25 |
if (county_choice != "All") and (county_choice):
|
| 26 |
county_choice = re.sub(r"(?i)\s*(County)\b", "", county_choice)
|
| 27 |
gdf = gdf.filter(_.county == county_choice)
|
|
|
|
| 35 |
|
| 36 |
def fit_bounds(state_choice, county_choice, m):
|
| 37 |
if state_choice != "All":
|
| 38 |
+
# gdf = county_bounds.filter(_.state.isin(state_choice))
|
| 39 |
+
gdf = county_bounds.filter(_.state == state_choice)
|
| 40 |
|
| 41 |
if (county_choice != "All") and (county_choice):
|
| 42 |
gdf = gdf.filter(_.county == county_choice)
|
|
|
|
| 106 |
"source": "tpl",
|
| 107 |
"source-layer": source_layer_name,
|
| 108 |
"type": "fill",
|
| 109 |
+
'filter': ["match", ["get", 'fid'], ids, True, False],
|
| 110 |
+
# 'filter': ['in', ['get', 'fid'], ["literal", ids]],
|
| 111 |
"paint": {
|
| 112 |
"fill-color": paint,
|
| 113 |
"fill-opacity": 1
|
|
|
|
| 131 |
|
| 132 |
@st.cache_data
|
| 133 |
def tpl_summary(_df):
|
| 134 |
+
summary = _df.group_by(_.manager_type).agg(amount = _.amount.sum())
|
| 135 |
+
public_dollars = round( summary.filter(_.manager_type.isin(["FED", "STAT", "LOC", "DIST"])).agg(total = _.amount.sum()).to_pandas().values[0][0] )
|
| 136 |
+
private_dollars = round( summary.filter(_.manager_type.isin(["PVT", "NGO"])).agg(total = _.amount.sum()).to_pandas().values[0][0] )
|
| 137 |
+
total_dollars = round( summary.agg(total = _.amount.sum()).to_pandas().values[0][0] )
|
|
|
|
|
|
|
| 138 |
return public_dollars, private_dollars, total_dollars
|
| 139 |
|
| 140 |
# @st.cache_data
|
| 141 |
def calc_delta(_df):
|
| 142 |
deltas = (_df
|
| 143 |
+
.group_by(_.manager_type, _.year)
|
| 144 |
+
.agg(amount = _.amount.sum())
|
| 145 |
+
.mutate(total = _.amount.cumsum(order_by=_.year, group_by=_.manager_type))
|
| 146 |
.mutate(lag = _.total.lag(1))
|
| 147 |
.mutate(delta = (100*(_.total - _.lag) / _.total).round(2) )
|
| 148 |
# .filter(_.year >=2019)
|
| 149 |
+
.select(_.manager_type, _.year, _.total, _.lag, _.delta)
|
| 150 |
)
|
| 151 |
+
public_delta = deltas.filter(_.manager_type.isin(["FED", "STAT", "LOC", "DIST"])).to_pandas()
|
| 152 |
public_delta = 0 if public_delta.empty else public_delta.delta[-1]
|
| 153 |
+
private_delta = deltas.filter(_.manager_type.isin(["PVT", "NGO"])).to_pandas()
|
| 154 |
private_delta = 0 if private_delta.empty else private_delta.delta[-1]
|
| 155 |
return public_delta, private_delta
|
| 156 |
|
|
|
|
| 180 |
# .filter(~_.year.isnull())
|
| 181 |
# .filter(_.year > 0)
|
| 182 |
# .group_by([_.year, _[column]])
|
| 183 |
+
# .agg(amount = _.amount.sum())
|
| 184 |
# .mutate(year = _.year.cast("int"),
|
| 185 |
+
# amount = _.amount.cumsum(group_by=_[column], order_by=_.year))
|
| 186 |
|
| 187 |
# .to_pandas()
|
| 188 |
# )
|
|
|
|
| 196 |
# use the colors
|
| 197 |
plt = alt.Chart(timeseries).mark_line().encode(
|
| 198 |
x='year:O',
|
| 199 |
+
y = alt.Y('amount:Q'),
|
| 200 |
color=alt.Color(column,scale= alt.Scale(domain=domain, range=range_))
|
| 201 |
).properties(height=350)
|
| 202 |
return plt
|
app/variables.py
CHANGED
|
@@ -10,8 +10,8 @@ from datetime import timedelta
|
|
| 10 |
import re
|
| 11 |
duckdb_install_h3()
|
| 12 |
|
| 13 |
-
# con = ibis.duckdb.connect("duck.db",extensions = ["spatial", "h3"])
|
| 14 |
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
|
|
|
|
| 15 |
set_secrets(con)
|
| 16 |
|
| 17 |
# Get signed URLs to access license-controlled layers
|
|
@@ -19,72 +19,30 @@ key = st.secrets["MINIO_KEY"]
|
|
| 19 |
secret = st.secrets["MINIO_SECRET"]
|
| 20 |
client = Minio("minio.carlboettiger.info", key, secret)
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
tpl_table = con.read_parquet(tpl_geom_url).mutate(geom = _.geom.convert("ESRI:102039", "EPSG:4326")).rename(year = 'Close_Year', state_name = 'State', county = 'County')
|
| 28 |
|
| 29 |
county_bounds = con.read_parquet("https://minio.carlboettiger.info/public-census/2024/county/2024_us_county.parquet")
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
.rename(FIPS_county = "FIPS", measure_amount = 'Conservation Funds Approved',
|
| 33 |
-
measure_status = "Status", measure_purpose = "Purpose",)
|
| 34 |
-
.mutate(measure_year = _.Date.year()).drop('Date','geom'))
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
landvote_table = (con.read_parquet("s3://shared-tpl/landvote_geom.parquet")
|
| 38 |
-
.rename(FIPS_county = "FIPS", measure_amount = 'Conservation Funds Approved',
|
| 39 |
-
measure_status = "Status", measure_purpose = "Purpose")
|
| 40 |
-
.mutate(year = _.Date.year()).drop('Date'))
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
tpl_drop_cols = ['Reported_Acres','Close_Date','EasementHolder_Name',
|
| 44 |
-
'Data_Provider','Data_Source','Data_Aggregator',
|
| 45 |
-
'Program_ID','Sponsor_ID']
|
| 46 |
-
tpl_z8_url = "s3://shared-tpl/tpl_h3_z8.parquet"
|
| 47 |
-
tpl_z8 = con.read_parquet(tpl_z8_url).mutate(h8 = _.h8.lower()).drop(tpl_drop_cols)
|
| 48 |
-
|
| 49 |
-
select_cols = ['fid','TPL_ID','landvote_id',
|
| 50 |
-
'state','state_name','county',
|
| 51 |
-
'FIPS_county',
|
| 52 |
-
'city','jurisdiction',
|
| 53 |
-
'Close_Year', 'Site_Name',
|
| 54 |
-
'Owner_Name','Owner_Type',
|
| 55 |
-
'Manager_Name','Manager_Type',
|
| 56 |
-
'Purchase_Type','EasementHolder_Type',
|
| 57 |
-
'Public_Access_Type','Purpose_Type',
|
| 58 |
-
'Duration_Type','Amount',
|
| 59 |
-
'Program_Name','Sponsor_Name',
|
| 60 |
-
'Sponsor_Type','measure_year',
|
| 61 |
-
'measure_status','measure_purpose',
|
| 62 |
-
'measure_amount',
|
| 63 |
-
# 'carbon',
|
| 64 |
-
'richness','svi',
|
| 65 |
-
'h8']
|
| 66 |
-
|
| 67 |
-
database = (
|
| 68 |
-
tpl_z8.drop('State','County')
|
| 69 |
-
.left_join(landvote_z8, "h8").drop('h8_right')
|
| 70 |
-
.left_join(svi, "h8").drop('h8_right')
|
| 71 |
-
.left_join(mobi, "h8").drop('h8_right')
|
| 72 |
-
# .left_join(carbon, "h8").drop('h8_right')
|
| 73 |
-
).select(select_cols).distinct()
|
| 74 |
-
|
| 75 |
-
database_geom = (database.drop('h8').distinct().inner_join(tpl_table.select('geom','TPL_ID','fid','Shape_Area'), [database.fid == tpl_table.fid])
|
| 76 |
-
.mutate(acres = _.Shape_Area*0.0002471054)
|
| 77 |
-
)
|
| 78 |
|
| 79 |
pmtiles = client.get_presigned_url(
|
| 80 |
"GET",
|
| 81 |
"shared-tpl",
|
| 82 |
-
"
|
| 83 |
expires=timedelta(hours=2),
|
| 84 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
source_layer_name = '
|
| 87 |
-
# source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles))[0]) #stripping hyphens to get layer name
|
| 88 |
|
| 89 |
states = (
|
| 90 |
"All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
|
|
@@ -116,7 +74,7 @@ style_options = {
|
|
| 116 |
"Acquisition Cost":
|
| 117 |
["interpolate",
|
| 118 |
['exponential', 1],
|
| 119 |
-
["get", "
|
| 120 |
0, "#fde725",
|
| 121 |
36000, "#b4de2c",
|
| 122 |
93000, "#6ccd59",
|
|
@@ -130,7 +88,7 @@ style_options = {
|
|
| 130 |
]
|
| 131 |
,
|
| 132 |
"Manager Type": {
|
| 133 |
-
'property': '
|
| 134 |
'type': 'categorical',
|
| 135 |
'stops': [
|
| 136 |
['FED', darkblue],
|
|
@@ -145,7 +103,7 @@ style_options = {
|
|
| 145 |
]
|
| 146 |
},
|
| 147 |
"Access": {
|
| 148 |
-
'property': '
|
| 149 |
'type': 'categorical',
|
| 150 |
'stops': [
|
| 151 |
['OA', green],
|
|
@@ -155,7 +113,7 @@ style_options = {
|
|
| 155 |
]
|
| 156 |
},
|
| 157 |
"Purpose": {
|
| 158 |
-
'property': '
|
| 159 |
'type': 'categorical',
|
| 160 |
'stops': [
|
| 161 |
['FOR', green],
|
|
@@ -174,12 +132,10 @@ style_options = {
|
|
| 174 |
style_choice_columns = {'Manager Type': style_options['Manager Type']['property'],
|
| 175 |
'Access' : style_options['Access']['property'],
|
| 176 |
'Purpose': style_options['Purpose']['property'],
|
| 177 |
-
'Acquisition Cost': '
|
| 178 |
-
'Measure Cost': '
|
| 179 |
}
|
| 180 |
|
| 181 |
-
# metric_columns = {'svi': 'svi', 'mobi': 'richness', 'landvote':'measure_status'}
|
| 182 |
-
|
| 183 |
from langchain_openai import ChatOpenAI
|
| 184 |
import streamlit as st
|
| 185 |
|
|
|
|
| 10 |
import re
|
| 11 |
duckdb_install_h3()
|
| 12 |
|
|
|
|
| 13 |
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
|
| 14 |
+
con.raw_sql("SET THREADS=100;")
|
| 15 |
set_secrets(con)
|
| 16 |
|
| 17 |
# Get signed URLs to access license-controlled layers
|
|
|
|
| 19 |
secret = st.secrets["MINIO_SECRET"]
|
| 20 |
client = Minio("minio.carlboettiger.info", key, secret)
|
| 21 |
|
| 22 |
+
tpl_z8 = con.read_parquet("s3://shared-tpl/conservation_almanac/z8/tpl_h3_z8.parquet", table_name = 'conservation_almanac')
|
| 23 |
+
landvote_z8 = con.read_parquet("s3://shared-tpl/landvote/z8/landvote_h3_z8.parquet", table_name = 'landvote')
|
| 24 |
+
mobi_z8 = con.read_parquet("https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet", table_name = 'mobi')
|
| 25 |
+
svi_z8 = con.read_parquet("https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet",table_name = 'svi')
|
| 26 |
+
carbon_z8 = con.read_parquet("https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet",table_name = 'carbon')
|
|
|
|
| 27 |
|
| 28 |
county_bounds = con.read_parquet("https://minio.carlboettiger.info/public-census/2024/county/2024_us_county.parquet")
|
| 29 |
+
landvote_table = con.read_parquet("s3://shared-tpl/landvote/landvote_geom.parquet")
|
| 30 |
+
tpl_table = con.read_parquet('s3://shared-tpl/conservation_almanac/tpl.parquet')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
pmtiles = client.get_presigned_url(
|
| 33 |
"GET",
|
| 34 |
"shared-tpl",
|
| 35 |
+
"conservation_almanac/tpl.pmtiles",
|
| 36 |
expires=timedelta(hours=2),
|
| 37 |
)
|
| 38 |
+
# pmtiles = client.get_presigned_url(
|
| 39 |
+
# "GET",
|
| 40 |
+
# "shared-tpl",
|
| 41 |
+
# "tpl_v2.pmtiles",
|
| 42 |
+
# expires=timedelta(hours=2),
|
| 43 |
+
# )
|
| 44 |
|
| 45 |
+
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles))[0]) #stripping hyphens to get layer name
|
|
|
|
| 46 |
|
| 47 |
states = (
|
| 48 |
"All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
|
|
|
|
| 74 |
"Acquisition Cost":
|
| 75 |
["interpolate",
|
| 76 |
['exponential', 1],
|
| 77 |
+
["get", "amount"],
|
| 78 |
0, "#fde725",
|
| 79 |
36000, "#b4de2c",
|
| 80 |
93000, "#6ccd59",
|
|
|
|
| 88 |
]
|
| 89 |
,
|
| 90 |
"Manager Type": {
|
| 91 |
+
'property': 'manager_type',
|
| 92 |
'type': 'categorical',
|
| 93 |
'stops': [
|
| 94 |
['FED', darkblue],
|
|
|
|
| 103 |
]
|
| 104 |
},
|
| 105 |
"Access": {
|
| 106 |
+
'property': 'access_type',
|
| 107 |
'type': 'categorical',
|
| 108 |
'stops': [
|
| 109 |
['OA', green],
|
|
|
|
| 113 |
]
|
| 114 |
},
|
| 115 |
"Purpose": {
|
| 116 |
+
'property': 'purpose_type',
|
| 117 |
'type': 'categorical',
|
| 118 |
'stops': [
|
| 119 |
['FOR', green],
|
|
|
|
| 132 |
style_choice_columns = {'Manager Type': style_options['Manager Type']['property'],
|
| 133 |
'Access' : style_options['Access']['property'],
|
| 134 |
'Purpose': style_options['Purpose']['property'],
|
| 135 |
+
'Acquisition Cost': 'amount',
|
| 136 |
+
'Measure Cost': 'conservation_funds_approved',
|
| 137 |
}
|
| 138 |
|
|
|
|
|
|
|
| 139 |
from langchain_openai import ChatOpenAI
|
| 140 |
import streamlit as st
|
| 141 |
|