cassiebuhler commited on
Commit
9adb68a
·
1 Parent(s): 08ae6d1

let's see if this works...

Browse files

chatbot has multiple datasets now. Also, pmtiles only shows up in notebook-- not sure if this will work

Files changed (5) hide show
  1. .gitignore +5 -0
  2. app/app.py +24 -17
  3. app/system_prompt.txt +53 -9
  4. app/utils.py +22 -22
  5. app/variables.py +22 -66
.gitignore CHANGED
@@ -9,3 +9,8 @@
9
  *.vrt
10
  .streamlit
11
  __pycache__
 
 
 
 
 
 
9
  *.vrt
10
  .streamlit
11
  __pycache__
12
+ *.block
13
+ *.tmp
14
+ *.parquet
15
+ *.geojson
16
+ **/*.shp
app/app.py CHANGED
@@ -4,13 +4,28 @@ from cng.h3 import *
4
  from utils import *
5
  from ibis import _
6
 
7
- current_tables = con.list_tables()
8
 
9
- if "mydata" not in set(current_tables):
10
- con.create_table("mydata", database_geom)
11
 
12
- chatbot_data = con.table("mydata")
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  st.set_page_config(layout="wide",
16
  page_title="TPL Conservation Almanac",
@@ -25,7 +40,6 @@ basemaps = leafmap.basemaps.keys()
25
 
26
  m = leafmap.Map(style = "positron")
27
 
28
-
29
  from datetime import time
30
 
31
  with st.sidebar:
@@ -93,7 +107,9 @@ from langchain_core.prompts import ChatPromptTemplate
93
  prompt = ChatPromptTemplate.from_messages([
94
  ("system", template),
95
  ("human", "{input}")
96
- ]).partial(dialect="duckdb", table_info = chatbot_data.schema())
 
 
97
 
98
  # chatbot_toggles = {key: False for key in keys}
99
  structured_llm = llm.with_structured_output(SQLResponse)
@@ -115,7 +131,7 @@ def run_sql(query,paint):
115
  st.success(explanation)
116
  return pd.DataFrame({'fid' : []})
117
 
118
- result = chatbot_data.sql(sql_query).distinct().execute()
119
  if result.empty :
120
  explanation = "This query did not return any results. Please try again with a different query."
121
  st.warning(explanation, icon="⚠️")
@@ -127,9 +143,6 @@ def run_sql(query,paint):
127
  return result
128
  elif ("fid" and "geom" in result.columns):
129
  style = tpl_style(result["fid"].tolist(), paint)
130
- # legend, position, bg_color, fontsize = get_legend(paint)
131
-
132
- # m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
133
  m.add_pmtiles(pmtiles, style=style, opacity=0.5, tooltip=True, fit_bounds=True)
134
  m.fit_bounds(result.total_bounds.tolist())
135
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
@@ -180,14 +193,8 @@ if 'out' not in locals():
180
  m.add_pmtiles(pmtiles, style=tpl_style(unique_ids, paint), opacity=0.5, tooltip=True, fit_bounds=True)
181
  else:
182
  m.add_pmtiles(pmtiles, style=tpl_style_default(paint), opacity=0.5, tooltip=True, fit_bounds=True)
183
-
184
- # legend, position, bg_color, fontsize = get_legend(paint)
185
- # m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
186
- #zoom to state(s)
187
  fit_bounds(state_choice, county_choice, m)
188
 
189
- ## Render display panels
190
- #
191
 
192
  m.to_streamlit()
193
  with st.expander("🔍 View/download data"): # adding data table
@@ -216,7 +223,7 @@ with col1:
216
  get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area")
217
 
218
  with col2:
219
- gdf_landvote = group_data(gdf_landvote.filter(_.measure_status == 'Pass'), 'Measure Cost')
220
  get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures')
221
 
222
  st.divider()
 
4
  from utils import *
5
  from ibis import _
6
 
7
+ # current_tables = con.list_tables()
8
 
9
+ # if "conservation_almanac" not in set(current_tables):
10
+ # con.create_table("conservation_almanac", tpl_z8)
11
 
12
+ # if "landvote" not in set(current_tables):
13
+ # con.create_table("landvote", landvote_z8)
14
 
15
+ # if "carbon" not in set(current_tables):
16
+ # con.create_table("carbon", carbon_z8)
17
+
18
+ # if "mobi" not in set(current_tables):
19
+ # con.create_table("mobi", mobi_z8)
20
+
21
+ # if "svi" not in set(current_tables):
22
+ # con.create_table("svi", svi_z8)
23
+
24
+ # conservation_almanac = con.table("conservation_almanac")
25
+ # landvote = con.table("landvote")
26
+ # carbon = con.table("carbon")
27
+ # mobi = con.table("mobi")
28
+ # svi = con.table("svi")
29
 
30
  st.set_page_config(layout="wide",
31
  page_title="TPL Conservation Almanac",
 
40
 
41
  m = leafmap.Map(style = "positron")
42
 
 
43
  from datetime import time
44
 
45
  with st.sidebar:
 
107
  prompt = ChatPromptTemplate.from_messages([
108
  ("system", template),
109
  ("human", "{input}")
110
+ ]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
111
+ landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
112
+ svi = svi_z8.schema(), mobi = mobi_z8.schema())
113
 
114
  # chatbot_toggles = {key: False for key in keys}
115
  structured_llm = llm.with_structured_output(SQLResponse)
 
131
  st.success(explanation)
132
  return pd.DataFrame({'fid' : []})
133
 
134
+ result = con.sql(sql_query).distinct().execute()
135
  if result.empty :
136
  explanation = "This query did not return any results. Please try again with a different query."
137
  st.warning(explanation, icon="⚠️")
 
143
  return result
144
  elif ("fid" and "geom" in result.columns):
145
  style = tpl_style(result["fid"].tolist(), paint)
 
 
 
146
  m.add_pmtiles(pmtiles, style=style, opacity=0.5, tooltip=True, fit_bounds=True)
147
  m.fit_bounds(result.total_bounds.tolist())
148
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
 
193
  m.add_pmtiles(pmtiles, style=tpl_style(unique_ids, paint), opacity=0.5, tooltip=True, fit_bounds=True)
194
  else:
195
  m.add_pmtiles(pmtiles, style=tpl_style_default(paint), opacity=0.5, tooltip=True, fit_bounds=True)
 
 
 
 
196
  fit_bounds(state_choice, county_choice, m)
197
 
 
 
198
 
199
  m.to_streamlit()
200
  with st.expander("🔍 View/download data"): # adding data table
 
223
  get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area")
224
 
225
  with col2:
226
+ gdf_landvote = group_data(gdf_landvote.filter(_.status == 'Pass'), 'Measure Cost')
227
  get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures')
228
 
229
  st.divider()
app/system_prompt.txt CHANGED
@@ -1,4 +1,27 @@
1
- You are an expert in SQL and an assistant for mapping and analyzing the Trust for Public Land (TPL) data. The dataset is synthesized from TPL Conservation Alamanc, which track the impacts of public spending on land conservation, and TPL Landvote, a comprehensive database of conservation ballot measures. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. If the question doesn't necessitate a SQL query, only output an explanation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  For example:
4
  {{
@@ -8,11 +31,9 @@ For example:
8
 
9
  Ensure the response contains only this JSON object, with no additional text, formatting, or commentary.
10
 
11
- ONLY write SQL queries using the records and columns that exist in the table called mydata. This is the schema for mydata: {table_info}.
12
-
13
  # Important Details
14
- - Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
15
- - For map-related queries (e.g., "show me"), ALWAYS include "fid", "Site_Name" ,"geom", and "acres" in the results,
16
  - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
17
  - Wrap each column name in double quotes (") to denote them as delimited identifiers.
18
  - Wrap values that are strings in single quotes (') to distinguish them from column names.
@@ -22,17 +43,40 @@ ONLY write SQL queries using the records and columns that exist in the table cal
22
  ## Example:
23
  example_user: "What is most expensive protected site?"
24
  example_assistant: {{"sql_query":
25
- SELECT "fid", "Site_Name" ,"geom", "acres", "Amount"
26
- FROM mydata
27
- WHERE "Amount" = (SELECT MAX("Amount") FROM mydata);
28
  "explanation":"I selected the site with the highest `Amount`.
29
  }}
30
 
31
  ## Example:
32
  example_user: "Which sites are owned, managed or sponsored by the Trust for Public Land?"
33
  example_assistant: {{"sql_query":
34
- SELECT DISTINCT "fid", "Site_Name", "geom", "acres", "Owner_Name", "Manager_Name", "Sponsor_Name" FROM mydata
35
  WHERE "Owner_Name" ILIKE '%Trust for Public Land%' OR "Manager_Name" ILIKE '%Trust for Public Land%' OR "Sponsor_Name" ILIKE '%Trust for Public Land%'
36
  ORDER BY "Site_Name";
37
  "explanation":"I selected all sites affiliated with the Trust for Public Land.
38
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert in SQL and an assistant for mapping and analyzing the Trust for Public Land (TPL) data. You are provided multiple tables and must identify which table(s) to use. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
2
+
3
+ ONLY write SQL queries using the records and columns that exist in the relevant table. You have access to these tables:
4
+
5
+ conservation_almanac:
6
+ - Definition: Protected areas database tracking public spending on land conservation.
7
+ - Schema: {conservation_almanac}
8
+
9
+ landvote:
10
+ - Definition: Tracks land conservation ballot measures.
11
+ - Schema: {landvote}
12
+
13
+ carbon:
14
+ - Definition: level of irrecoverable carbon
15
+ - Schema: {carbon}
16
+
17
+ mobi:
18
+ - Definition: species richness from the NatureServe's Map of Biodiversity Importance (MOBI)
19
+ - Schema: {mobi}
20
+
21
+ svi:
22
+ - Definition: social vulnerability index; higher value indicates higher vulnerability. Make sure to only use svi values greater than 0.
23
+ - Schema: {svi}
24
+
25
 
26
  For example:
27
  {{
 
31
 
32
  Ensure the response contains only this JSON object, with no additional text, formatting, or commentary.
33
 
 
 
34
  # Important Details
35
+ - When joining tables, use the `h8` column.
36
+ - For visualization-related queries (e.g., "show me"), ALWAYS include "fid", "Site_Name" in the results,
37
  - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
38
  - Wrap each column name in double quotes (") to denote them as delimited identifiers.
39
  - Wrap values that are strings in single quotes (') to distinguish them from column names.
 
43
  ## Example:
44
  example_user: "What is most expensive protected site?"
45
  example_assistant: {{"sql_query":
46
+ SELECT "fid", "Site_Name", "Amount"
47
+ FROM conservation_almanac
48
+ WHERE "Amount" = (SELECT MAX("Amount") FROM conservation_almanac);
49
  "explanation":"I selected the site with the highest `Amount`.
50
  }}
51
 
52
  ## Example:
53
  example_user: "Which sites are owned, managed or sponsored by the Trust for Public Land?"
54
  example_assistant: {{"sql_query":
55
+ SELECT DISTINCT "fid", "Site_Name", "Owner_Name", "Manager_Name", "Sponsor_Name" FROM conservation_almanac
56
  WHERE "Owner_Name" ILIKE '%Trust for Public Land%' OR "Manager_Name" ILIKE '%Trust for Public Land%' OR "Sponsor_Name" ILIKE '%Trust for Public Land%'
57
  ORDER BY "Site_Name";
58
  "explanation":"I selected all sites affiliated with the Trust for Public Land.
59
  }}
60
+
61
+ ## Example:
62
+ example_user: "Show me protected areas with high levels of carbon"
63
+ example_assistant: {{"sql_query":
64
+ SELECT fid, AVG(carbon) as mean_carbon
65
+ FROM conservation_almanac
66
+ LEFT JOIN carbon
67
+ USING (h8)
68
+ GROUP BY fid
69
+ ORDER BY mean_carbon DESC LIMIT 10;
70
+ "explanation":"I joined `conservation_almanac` with `carbon` to retrieve irrecoverable carbon levels for protected areas in the `conservation_almanac`. I returned 10 areas with highest levels of carbon.
71
+ }}
72
+
73
+ example_user: "Show me protected areas that are in socially vulnerable areas
74
+ example_assistant: {{"sql_query":
75
+ SELECT fid, AVG(carbon) as mean_carbon
76
+ FROM conservation_almanac
77
+ LEFT JOIN carbon
78
+ USING (h8)
79
+ GROUP BY fid
80
+ ORDER BY mean_carbon DESC LIMIT 10;
81
+ "explanation":"I joined `conservation_almanac` with `carbon` to retrieve irrecoverable carbon levels for protected areas in the `conservation_almanac`. I returned 10 areas with highest levels of carbon.
82
+ }}
app/utils.py CHANGED
@@ -5,22 +5,23 @@ import altair as alt
5
  import re
6
 
7
  def get_counties(state_selection):
 
 
8
  if state_selection != 'All':
9
- counties = database.filter(_.state_name == state_selection).select('county').distinct().order_by('county').execute()
10
  counties = ['All'] + counties['county'].tolist()
11
  else:
12
  counties = None
13
  return counties
14
 
15
  def filter_data(table, state_choice, county_choice, year_range):
 
16
  min_year, max_year = year_range
17
  gdf = (table.filter(_.year>=(min_year))
18
  .filter(_.year<=(max_year))
19
  )
20
  if state_choice != "All":
21
- # gdf = gdf.filter(_.state_name.isin(state_choice))
22
- gdf = gdf.filter(_.state_name == state_choice)
23
-
24
  if (county_choice != "All") and (county_choice):
25
  county_choice = re.sub(r"(?i)\s*(County)\b", "", county_choice)
26
  gdf = gdf.filter(_.county == county_choice)
@@ -34,8 +35,8 @@ def group_data(table, style_choice):
34
 
35
  def fit_bounds(state_choice, county_choice, m):
36
  if state_choice != "All":
37
- # gdf = county_bounds.filter(_.state_name.isin(state_choice))
38
- gdf = county_bounds.filter(_.state_name == state_choice)
39
 
40
  if (county_choice != "All") and (county_choice):
41
  gdf = gdf.filter(_.county == county_choice)
@@ -105,7 +106,8 @@ def tpl_style(ids, paint):
105
  "source": "tpl",
106
  "source-layer": source_layer_name,
107
  "type": "fill",
108
- 'filter': ['in', ['get', 'fid'], ["literal", ids]],
 
109
  "paint": {
110
  "fill-color": paint,
111
  "fill-opacity": 1
@@ -129,28 +131,26 @@ def get_legend(paint):
129
 
130
  @st.cache_data
131
  def tpl_summary(_df):
132
- summary = _df.group_by(_.Manager_Type).agg(Amount = _.Amount.sum())
133
- public_dollars = round( summary.filter(_.Manager_Type.isin(["FED", "STAT", "LOC", "DIST"])).agg(total = _.Amount.sum()).to_pandas().values[0][0] )
134
- private_dollars = round( summary.filter(_.Manager_Type.isin(["PVT", "NGO"])).agg(total = _.Amount.sum()).to_pandas().values[0][0] )
135
- # tribal_dollars = summary.filter(_.Manager_Type.isin(["TRIB"])).agg(total = _.Amount.sum()).to_pandas().values[0][0]
136
- # tribal_dollars = tribal_dollars if tribal_dollars else round(tribal_dollars)
137
- total_dollars = round( summary.agg(total = _.Amount.sum()).to_pandas().values[0][0] )
138
  return public_dollars, private_dollars, total_dollars
139
 
140
  # @st.cache_data
141
  def calc_delta(_df):
142
  deltas = (_df
143
- .group_by(_.Manager_Type, _.year)
144
- .agg(Amount = _.Amount.sum())
145
- .mutate(total = _.Amount.cumsum(order_by=_.year, group_by=_.Manager_Type))
146
  .mutate(lag = _.total.lag(1))
147
  .mutate(delta = (100*(_.total - _.lag) / _.total).round(2) )
148
  # .filter(_.year >=2019)
149
- .select(_.Manager_Type, _.year, _.total, _.lag, _.delta)
150
  )
151
- public_delta = deltas.filter(_.Manager_Type.isin(["FED", "STAT", "LOC", "DIST"])).to_pandas()
152
  public_delta = 0 if public_delta.empty else public_delta.delta[-1]
153
- private_delta = deltas.filter(_.Manager_Type.isin(["PVT", "NGO"])).to_pandas()
154
  private_delta = 0 if private_delta.empty else private_delta.delta[-1]
155
  return public_delta, private_delta
156
 
@@ -180,9 +180,9 @@ def bar(area_totals, column, paint):
180
  # .filter(~_.year.isnull())
181
  # .filter(_.year > 0)
182
  # .group_by([_.year, _[column]])
183
- # .agg(Amount = _.Amount.sum())
184
  # .mutate(year = _.year.cast("int"),
185
- # Amount = _.Amount.cumsum(group_by=_[column], order_by=_.year))
186
 
187
  # .to_pandas()
188
  # )
@@ -196,7 +196,7 @@ def chart_time(timeseries, column, paint):
196
  # use the colors
197
  plt = alt.Chart(timeseries).mark_line().encode(
198
  x='year:O',
199
- y = alt.Y('Amount:Q'),
200
  color=alt.Color(column,scale= alt.Scale(domain=domain, range=range_))
201
  ).properties(height=350)
202
  return plt
 
5
  import re
6
 
7
  def get_counties(state_selection):
8
+ tpl_table.head().execute()
9
+
10
  if state_selection != 'All':
11
+ counties = tpl_table.filter(_.state == state_selection).select('county').distinct().order_by('county').execute()
12
  counties = ['All'] + counties['county'].tolist()
13
  else:
14
  counties = None
15
  return counties
16
 
17
  def filter_data(table, state_choice, county_choice, year_range):
18
+
19
  min_year, max_year = year_range
20
  gdf = (table.filter(_.year>=(min_year))
21
  .filter(_.year<=(max_year))
22
  )
23
  if state_choice != "All":
24
+ gdf = gdf.filter(_.state == state_choice)
 
 
25
  if (county_choice != "All") and (county_choice):
26
  county_choice = re.sub(r"(?i)\s*(County)\b", "", county_choice)
27
  gdf = gdf.filter(_.county == county_choice)
 
35
 
36
  def fit_bounds(state_choice, county_choice, m):
37
  if state_choice != "All":
38
+ # gdf = county_bounds.filter(_.state.isin(state_choice))
39
+ gdf = county_bounds.filter(_.state == state_choice)
40
 
41
  if (county_choice != "All") and (county_choice):
42
  gdf = gdf.filter(_.county == county_choice)
 
106
  "source": "tpl",
107
  "source-layer": source_layer_name,
108
  "type": "fill",
109
+ 'filter': ["match", ["get", 'fid'], ids, True, False],
110
+ # 'filter': ['in', ['get', 'fid'], ["literal", ids]],
111
  "paint": {
112
  "fill-color": paint,
113
  "fill-opacity": 1
 
131
 
132
  @st.cache_data
133
  def tpl_summary(_df):
134
+ summary = _df.group_by(_.manager_type).agg(amount = _.amount.sum())
135
+ public_dollars = round( summary.filter(_.manager_type.isin(["FED", "STAT", "LOC", "DIST"])).agg(total = _.amount.sum()).to_pandas().values[0][0] )
136
+ private_dollars = round( summary.filter(_.manager_type.isin(["PVT", "NGO"])).agg(total = _.amount.sum()).to_pandas().values[0][0] )
137
+ total_dollars = round( summary.agg(total = _.amount.sum()).to_pandas().values[0][0] )
 
 
138
  return public_dollars, private_dollars, total_dollars
139
 
140
  # @st.cache_data
141
  def calc_delta(_df):
142
  deltas = (_df
143
+ .group_by(_.manager_type, _.year)
144
+ .agg(amount = _.amount.sum())
145
+ .mutate(total = _.amount.cumsum(order_by=_.year, group_by=_.manager_type))
146
  .mutate(lag = _.total.lag(1))
147
  .mutate(delta = (100*(_.total - _.lag) / _.total).round(2) )
148
  # .filter(_.year >=2019)
149
+ .select(_.manager_type, _.year, _.total, _.lag, _.delta)
150
  )
151
+ public_delta = deltas.filter(_.manager_type.isin(["FED", "STAT", "LOC", "DIST"])).to_pandas()
152
  public_delta = 0 if public_delta.empty else public_delta.delta[-1]
153
+ private_delta = deltas.filter(_.manager_type.isin(["PVT", "NGO"])).to_pandas()
154
  private_delta = 0 if private_delta.empty else private_delta.delta[-1]
155
  return public_delta, private_delta
156
 
 
180
  # .filter(~_.year.isnull())
181
  # .filter(_.year > 0)
182
  # .group_by([_.year, _[column]])
183
+ # .agg(amount = _.amount.sum())
184
  # .mutate(year = _.year.cast("int"),
185
+ # amount = _.amount.cumsum(group_by=_[column], order_by=_.year))
186
 
187
  # .to_pandas()
188
  # )
 
196
  # use the colors
197
  plt = alt.Chart(timeseries).mark_line().encode(
198
  x='year:O',
199
+ y = alt.Y('amount:Q'),
200
  color=alt.Color(column,scale= alt.Scale(domain=domain, range=range_))
201
  ).properties(height=350)
202
  return plt
app/variables.py CHANGED
@@ -10,8 +10,8 @@ from datetime import timedelta
10
  import re
11
  duckdb_install_h3()
12
 
13
- # con = ibis.duckdb.connect("duck.db",extensions = ["spatial", "h3"])
14
  con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
 
15
  set_secrets(con)
16
 
17
  # Get signed URLs to access license-controlled layers
@@ -19,72 +19,30 @@ key = st.secrets["MINIO_KEY"]
19
  secret = st.secrets["MINIO_SECRET"]
20
  client = Minio("minio.carlboettiger.info", key, secret)
21
 
22
- mobi = con.read_parquet("https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet").select("h8", "Z").rename(richness = "Z")
23
- svi = con.read_parquet("https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet").select("h8", "svi").filter(_.svi > 0)
24
- # carbon = con.read_parquet("https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet").select('carbon','h8')
25
-
26
- tpl_geom_url = "s3://shared-tpl/tpl.parquet"
27
- tpl_table = con.read_parquet(tpl_geom_url).mutate(geom = _.geom.convert("ESRI:102039", "EPSG:4326")).rename(year = 'Close_Year', state_name = 'State', county = 'County')
28
 
29
  county_bounds = con.read_parquet("https://minio.carlboettiger.info/public-census/2024/county/2024_us_county.parquet")
30
-
31
- landvote_z8 = (con.read_parquet("s3://shared-tpl/landvote_h3_z8.parquet")
32
- .rename(FIPS_county = "FIPS", measure_amount = 'Conservation Funds Approved',
33
- measure_status = "Status", measure_purpose = "Purpose",)
34
- .mutate(measure_year = _.Date.year()).drop('Date','geom'))
35
-
36
-
37
- landvote_table = (con.read_parquet("s3://shared-tpl/landvote_geom.parquet")
38
- .rename(FIPS_county = "FIPS", measure_amount = 'Conservation Funds Approved',
39
- measure_status = "Status", measure_purpose = "Purpose")
40
- .mutate(year = _.Date.year()).drop('Date'))
41
-
42
-
43
- tpl_drop_cols = ['Reported_Acres','Close_Date','EasementHolder_Name',
44
- 'Data_Provider','Data_Source','Data_Aggregator',
45
- 'Program_ID','Sponsor_ID']
46
- tpl_z8_url = "s3://shared-tpl/tpl_h3_z8.parquet"
47
- tpl_z8 = con.read_parquet(tpl_z8_url).mutate(h8 = _.h8.lower()).drop(tpl_drop_cols)
48
-
49
- select_cols = ['fid','TPL_ID','landvote_id',
50
- 'state','state_name','county',
51
- 'FIPS_county',
52
- 'city','jurisdiction',
53
- 'Close_Year', 'Site_Name',
54
- 'Owner_Name','Owner_Type',
55
- 'Manager_Name','Manager_Type',
56
- 'Purchase_Type','EasementHolder_Type',
57
- 'Public_Access_Type','Purpose_Type',
58
- 'Duration_Type','Amount',
59
- 'Program_Name','Sponsor_Name',
60
- 'Sponsor_Type','measure_year',
61
- 'measure_status','measure_purpose',
62
- 'measure_amount',
63
- # 'carbon',
64
- 'richness','svi',
65
- 'h8']
66
-
67
- database = (
68
- tpl_z8.drop('State','County')
69
- .left_join(landvote_z8, "h8").drop('h8_right')
70
- .left_join(svi, "h8").drop('h8_right')
71
- .left_join(mobi, "h8").drop('h8_right')
72
- # .left_join(carbon, "h8").drop('h8_right')
73
- ).select(select_cols).distinct()
74
-
75
- database_geom = (database.drop('h8').distinct().inner_join(tpl_table.select('geom','TPL_ID','fid','Shape_Area'), [database.fid == tpl_table.fid])
76
- .mutate(acres = _.Shape_Area*0.0002471054)
77
- )
78
 
79
  pmtiles = client.get_presigned_url(
80
  "GET",
81
  "shared-tpl",
82
- "tpl_v2.pmtiles",
83
  expires=timedelta(hours=2),
84
  )
 
 
 
 
 
 
85
 
86
- source_layer_name = 'tpl'
87
- # source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles))[0]) #stripping hyphens to get layer name
88
 
89
  states = (
90
  "All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
@@ -116,7 +74,7 @@ style_options = {
116
  "Acquisition Cost":
117
  ["interpolate",
118
  ['exponential', 1],
119
- ["get", "Amount"],
120
  0, "#fde725",
121
  36000, "#b4de2c",
122
  93000, "#6ccd59",
@@ -130,7 +88,7 @@ style_options = {
130
  ]
131
  ,
132
  "Manager Type": {
133
- 'property': 'Manager_Type',
134
  'type': 'categorical',
135
  'stops': [
136
  ['FED', darkblue],
@@ -145,7 +103,7 @@ style_options = {
145
  ]
146
  },
147
  "Access": {
148
- 'property': 'Public_Access_Type',
149
  'type': 'categorical',
150
  'stops': [
151
  ['OA', green],
@@ -155,7 +113,7 @@ style_options = {
155
  ]
156
  },
157
  "Purpose": {
158
- 'property': 'Purpose_Type',
159
  'type': 'categorical',
160
  'stops': [
161
  ['FOR', green],
@@ -174,12 +132,10 @@ style_options = {
174
  style_choice_columns = {'Manager Type': style_options['Manager Type']['property'],
175
  'Access' : style_options['Access']['property'],
176
  'Purpose': style_options['Purpose']['property'],
177
- 'Acquisition Cost': 'Amount',
178
- 'Measure Cost': 'measure_amount',
179
  }
180
 
181
- # metric_columns = {'svi': 'svi', 'mobi': 'richness', 'landvote':'measure_status'}
182
-
183
  from langchain_openai import ChatOpenAI
184
  import streamlit as st
185
 
 
10
  import re
11
  duckdb_install_h3()
12
 
 
13
  con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
14
+ con.raw_sql("SET THREADS=100;")
15
  set_secrets(con)
16
 
17
  # Get signed URLs to access license-controlled layers
 
19
  secret = st.secrets["MINIO_SECRET"]
20
  client = Minio("minio.carlboettiger.info", key, secret)
21
 
22
+ tpl_z8 = con.read_parquet("s3://shared-tpl/conservation_almanac/z8/tpl_h3_z8.parquet", table_name = 'conservation_almanac')
23
+ landvote_z8 = con.read_parquet("s3://shared-tpl/landvote/z8/landvote_h3_z8.parquet", table_name = 'landvote')
24
+ mobi_z8 = con.read_parquet("https://minio.carlboettiger.info/public-mobi/hex/all-richness-h8.parquet", table_name = 'mobi')
25
+ svi_z8 = con.read_parquet("https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract_h3_z8.parquet",table_name = 'svi')
26
+ carbon_z8 = con.read_parquet("https://minio.carlboettiger.info/public-carbon/hex/us-tracts-vuln-total-carbon-2018-h8.parquet",table_name = 'carbon')
 
27
 
28
  county_bounds = con.read_parquet("https://minio.carlboettiger.info/public-census/2024/county/2024_us_county.parquet")
29
+ landvote_table = con.read_parquet("s3://shared-tpl/landvote/landvote_geom.parquet")
30
+ tpl_table = con.read_parquet('s3://shared-tpl/conservation_almanac/tpl.parquet')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  pmtiles = client.get_presigned_url(
33
  "GET",
34
  "shared-tpl",
35
+ "conservation_almanac/tpl.pmtiles",
36
  expires=timedelta(hours=2),
37
  )
38
+ # pmtiles = client.get_presigned_url(
39
+ # "GET",
40
+ # "shared-tpl",
41
+ # "tpl_v2.pmtiles",
42
+ # expires=timedelta(hours=2),
43
+ # )
44
 
45
+ source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles))[0]) #stripping hyphens to get layer name
 
46
 
47
  states = (
48
  "All", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
 
74
  "Acquisition Cost":
75
  ["interpolate",
76
  ['exponential', 1],
77
+ ["get", "amount"],
78
  0, "#fde725",
79
  36000, "#b4de2c",
80
  93000, "#6ccd59",
 
88
  ]
89
  ,
90
  "Manager Type": {
91
+ 'property': 'manager_type',
92
  'type': 'categorical',
93
  'stops': [
94
  ['FED', darkblue],
 
103
  ]
104
  },
105
  "Access": {
106
+ 'property': 'access_type',
107
  'type': 'categorical',
108
  'stops': [
109
  ['OA', green],
 
113
  ]
114
  },
115
  "Purpose": {
116
+ 'property': 'purpose_type',
117
  'type': 'categorical',
118
  'stops': [
119
  ['FOR', green],
 
132
  style_choice_columns = {'Manager Type': style_options['Manager Type']['property'],
133
  'Access' : style_options['Access']['property'],
134
  'Purpose': style_options['Purpose']['property'],
135
+ 'Acquisition Cost': 'amount',
136
+ 'Measure Cost': 'conservation_funds_approved',
137
  }
138
 
 
 
139
  from langchain_openai import ChatOpenAI
140
  import streamlit as st
141