CoursePython

from osgeo import ogr

# Define the WKT string
wktstring = "POINT (1120351.5712494177 741921.4223245403)"

# Transform to a GDAL (OGR) object
point = ogr.CreateGeometryFromWkt(wktstring)

# Get properties
print(type(point))
print("%d,%d" % (point.GetX(), point.GetY()))

<class 'osgeo.ogr.Geometry'>
1120351,741921

from shapely.geometry import Point
from shapely.wkt import loads

# Create point from WKT string 
wktstring = 'POINT(173994.1578792833 444133.6032947102)'
wageningen_campus = loads(wktstring)
print(type(wageningen_campus))

# Point directly
wageningen_campus = Point([173994.1578792833, 444133.60329471016])
print(type(wageningen_campus))

<class 'shapely.geometry.point.Point'>
<class 'shapely.geometry.point.Point'>

import geopandas as gpd
from shapely.wkt import loads

# Define a point
wktstring = 'POINT(173994.1578792833 444133.6032947102)'

# Convert to a GeoSeries
gs = gpd.GeoSeries([loads(wktstring)])

# Inspect the properties
print(type(gs), len(gs))

# Specify the projection
gs.crs = "EPSG:28992" 

# We can now apply a function
# As an example, we add a buffer of 100 m
gs_buffer = gs.buffer(100)

# Inspect the results
print(gs.geometry)
print(gs_buffer.geometry)

<class 'geopandas.geoseries.GeoSeries'> 1
0    POINT (173994.158 444133.603)
dtype: geometry
0    POLYGON ((174094.158 444133.603, 174093.676 44...
dtype: geometry

import pandas as pd

# Create some data, with three points, a, b, and c.
data = {'name': ['a', 'b', 'c'],
        'x': [173994.1578792833, 173974.1578792833, 173910.1578792833],
        'y': [444135.6032947102, 444186.6032947102, 444111.6032947102]}

# Turn the data into a Pandas DataFrame (column names are extracted automatically)
df = pd.DataFrame(data)

# Inspect the DataFrame
print(df.head)

# Use the coordinates to make shapely Point geometries
geometry = [Point(xy) for xy in zip(df['x'], df['y'])]

# Pandas DataFrame and shapely Points can together become a GeoPandas GeoDataFrame
# Note that we specify the CRS (projection) directly while creating a GDF
wageningenGDF = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:28992") 

# Inspect wageningenGDF
print(type(wageningenGDF), len(wageningenGDF))

<bound method NDFrame.head of   name              x              y
0    a  173994.157879  444135.603295
1    b  173974.157879  444186.603295
2    c  173910.157879  444111.603295>
<class 'geopandas.geodataframe.GeoDataFrame'> 3

from matplotlib import pyplot as plt

# Plotting a map of the GeoDataFrame directly
wageningenGDF.plot(marker='*', color='green', markersize=50)

<Axes: >

# Check the current crs
print(wageningenGDF.crs)

# Re-project the points to WGS84
wageningenGDF = wageningenGDF.to_crs('EPSG:4326')

# Check the crs again to see if the changes were succesful
print(wageningenGDF.crs)

EPSG:28992
EPSG:4326

# Save to disk
wageningenGDF.to_file(filename='data/wageningenPOI.geojson', driver='GeoJSON')
wageningenGDF.to_file(filename='data/wageningenPOI.shp', driver='ESRI Shapefile')

# Read from disk
jsonGDF = gpd.read_file('data/wageningenPOI.geojson')
shpGDF = gpd.read_file('data/wageningenPOI.shp')

from owslib.wfs import WebFeatureService

# Put the WFS url in a variable
wfsUrl = 'https://geo.rijkswaterstaat.nl/services/ogc/gdr/nwb_wegen/ows?service=WFS&request=getcapabilities&version=2.0.0 '

# Create a WFS object
wfs = WebFeatureService(url=wfsUrl, version='2.0.0')

# Get the title from the object
print(wfs.identification.title)

# Check the contents of the WFS
print(list(wfs.contents))

GeoServer Web Feature Service
['nwb_wegen:hectopunten', 'nwb_wegen:mutaties_hectopunten', 'nwb_wegen:mutaties_wegvakken', 'nwb_wegen:nwb_light', 'nwb_wegen:wegvakken']

# Define center point and create bbox for study area
x, y = (173994.1578792833, 444133.60329471016)
xmin, xmax, ymin, ymax = x - 1000, x + 350, y - 1000, y + 350

# Get the features for the study area (using the wfs from the previous code block)
response = wfs.getfeature(typename=list(wfs.contents)[-1], bbox=(xmin, ymin, xmax, ymax))

# Save them to disk
with open('data/Roads.gml', 'wb') as file:
    file.write(response.read())

# Read in again with GeoPandas
roadsGDF = gpd.read_file('data/Roads.gml')

# Inspect and plot to get a quick view
print(type(roadsGDF))
roadsGDF.plot()
plt.show()

<class 'geopandas.geodataframe.GeoDataFrame'>

import json

# Get the WFS of the BAG
wfsUrl = 'https://service.pdok.nl/lv/bag/wfs/v2_0'
wfs = WebFeatureService(url=wfsUrl, version='2.0.0')
layer = list(wfs.contents)[0]

# Define center point and create bbox for study area
x, y = (173994.1578792833, 444133.60329471016)
xmin, xmax, ymin, ymax = x - 500, x + 500, y - 500, y + 500

# Get the features for the study area
# notice that we now get them as json, in contrast to before
response = wfs.getfeature(typename=layer, bbox=(xmin, ymin, xmax, ymax), outputFormat='json')
data = json.loads(response.read())

# Create GeoDataFrame, without saving first
buildingsGDF = gpd.GeoDataFrame.from_features(data['features'])

# Set crs to RD New
buildingsGDF.crs = 28992

# Plot roads and buildings together
roadlayer = roadsGDF.plot(color='grey')
buildingsGDF.plot(ax=roadlayer, color='red')

# Set the limits of the x and y axis
roadlayer.set_xlim(xmin, xmax)
roadlayer.set_ylim(ymin, ymax)

# Save the figure to disk
plt.savefig('./output/BuildingsAndRoads.png')

# Pandas function that returns the column labels of the DataFrame
print(buildingsGDF.columns)

# Pandas function that returns the first n rows, default n = 5
print(buildingsGDF.head())

# shape area (in the units of the projection)
print(buildingsGDF.area)

Index(['geometry', 'identificatie', 'rdf_seealso', 'bouwjaar', 'status', 'gebruiksdoel', 'oppervlakte_min', 'oppervlakte_max', 'aantal_verblijfsobjecten'], dtype='object') geometry identificatie \ 0 POLYGON ((174109.846 443692.447, 174111.755 44... 0289100000002314 1 POLYGON ((174160.754 444381.572, 174169.468 44... 0289100000002334 2 POLYGON ((173839.227 444032.995, 173838.937 44... 0289100000001853 3 POLYGON ((173668.781 443782.141, 173664.062 44... 0289100000001425 4 POLYGON ((173723.809 444187.129, 173726.313 44... 0289100000001461

                                     rdf_seealso  bouwjaar  \

0 http://bag.basisregistraties.overheid.nl/bag/i… 1998
1 http://bag.basisregistraties.overheid.nl/bag/i… 1997
2 http://bag.basisregistraties.overheid.nl/bag/i… 1960
3 http://bag.basisregistraties.overheid.nl/bag/i… 1970
4 http://bag.basisregistraties.overheid.nl/bag/i… 1994

        status                         gebruiksdoel  oppervlakte_min  \

0 Pand in gebruik sportfunctie 3389.0
1 Pand in gebruik bijeenkomstfunctie,onderwijsfunctie 11421.0
2 Pand in gebruik bijeenkomstfunctie,kantoorfunctie 3420.0
3 Pand in gebruik NaN
4 Pand in gebruik industriefunctie,kantoorfunctie 11714.0

oppervlakte_max aantal_verblijfsobjecten
0 3389.0 1
1 11421.0 1
2 3420.0 1
3 NaN 0
4 11714.0 1
0 2306.377288 1 8495.548236 2 1894.765417 3 4934.115309 4 4235.494168 …
111 55.514171 112 586.294838 113 153.238880 114 23.620050 115 32.915346 Length: 116, dtype: float64

# Inspect building year column
print(buildingsGDF['bouwjaar'])

0      1998
1      1997
2      1960
3      1970
4      1994
       ... 
111    2015
112    2005
113    2019
114    2018
115    2020
Name: bouwjaar, Length: 116, dtype: int64

# Inspect first
print(buildingsGDF.area > 1000)

# Make the selection, select all rows with area > 1000 m2, and all columns
# Using 'label based' indexing with loc, here with a Boolean array
largeBuildingsGDF = buildingsGDF.loc[buildingsGDF.area > 1000, :]

# Plot
largeBuildingsGDF.plot()

0       True
1       True
2       True
3       True
4       True
       ...  
111    False
112    False
113    False
114    False
115    False
Length: 116, dtype: bool

<Axes: >

# Inspect first
print( buildingsGDF['status'] != 'Pand in gebruik' )

# Make the selection, the list of required values can contain more than one item
newBuildingsGDF = buildingsGDF[buildingsGDF['status'] != 'Pand in gebruik']

# Plot the new buildings with a basemap for reference
# based on https://geopandas.org/gallery/plotting_basemap_background.html
import contextily as ctx

# Re-project
newBuildingsGDF = newBuildingsGDF.to_crs(epsg=3857)

# Plot with 50% transparency
ax = newBuildingsGDF.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, zoom=17)
ax.set_axis_off()

0      False
1      False
2      False
3      False
4      False
       ...  
111    False
112    False
113    False
114    False
115    False
Name: status, Length: 116, dtype: bool

print(type(roadsGDF))
print(roadsGDF.geometry)
print(type(roadsGDF.geometry))
print(roadsGDF['geometry'])

<class 'geopandas.geodataframe.GeoDataFrame'>
0      LINESTRING (173013.000 443861.000, 173017.000 ...
1      LINESTRING (174191.225 444043.279, 174073.464 ...
2      LINESTRING (173053.622 443720.683, 173082.215 ...
3      LINESTRING (173733.297 443864.215, 173727.082 ...
4      LINESTRING (173097.703 443846.478, 173100.261 ...
                             ...                        
466    LINESTRING (173449.802 443379.090, 173453.092 ...
467    LINESTRING (173346.000 443278.000, 173418.547 ...
468    LINESTRING (173345.000 443283.000, 173346.000 ...
469    LINESTRING (173048.920 443065.842, 173019.000 ...
470    LINESTRING (172971.735 443255.907, 172975.242 ...
Name: geometry, Length: 471, dtype: geometry
<class 'geopandas.geoseries.GeoSeries'>
0      LINESTRING (173013.000 443861.000, 173017.000 ...
1      LINESTRING (174191.225 444043.279, 174073.464 ...
2      LINESTRING (173053.622 443720.683, 173082.215 ...
3      LINESTRING (173733.297 443864.215, 173727.082 ...
4      LINESTRING (173097.703 443846.478, 173100.261 ...
                             ...                        
466    LINESTRING (173449.802 443379.090, 173453.092 ...
467    LINESTRING (173346.000 443278.000, 173418.547 ...
468    LINESTRING (173345.000 443283.000, 173346.000 ...
469    LINESTRING (173048.920 443065.842, 173019.000 ...
470    LINESTRING (172971.735 443255.907, 172975.242 ...
Name: geometry, Length: 471, dtype: geometry

# Buffer of 1.5 m on both sides
roadsPolygonGDF = gpd.GeoDataFrame(roadsGDF, geometry=roadsGDF.buffer(distance=1.5)) 

# Plot
roadsPolygonGDF.plot(color='blue', edgecolor='blue')

# Check the total coverage of buffers
print(roadsPolygonGDF.area.sum())

126694.30812794522

# Apply unary_all()
# This returns a geometry, which we convert to a GeoSeries to be able to apply GeoPandas methods again
roadsUnionGS = gpd.GeoSeries(roadsPolygonGDF.overlay())
print(roadsUnionGS)

# Check the new total coverage of buffers and compute the overlap
print(roadsUnionGS.area)
print('There was an overlap of ' + round((roadsPolygonGDF.area.sum() - roadsUnionGS.area[0]), 1).astype(str) + ' square meters.')

# Specify the coordinate system for roads
roadsPolygonGDF.crs = 28992

# Re-project new buildings dataset
newBuildingsGDF = newBuildingsGDF.to_crs(epsg=28992)

# Buffer, returns geometry, convert to GeoSeries
areaOfInterestGS = gpd.GeoSeries(newBuildingsGDF.buffer(distance=100).union_all())

# Convex hull, returns a GeoSeries of geometries, convert to GeoDataFrame
areaOfInterestGDF = gpd.GeoDataFrame(areaOfInterestGS.convex_hull)

# Adapt metadata
areaOfInterestGDF = areaOfInterestGDF.rename(columns={0:'geometry'}).set_geometry('geometry')
areaOfInterestGDF.crs = 'EPSG:28992'

# Perform an intersection overlay
roadsIntersectionGDF = gpd.overlay(areaOfInterestGDF, roadsPolygonGDF, how="intersection")

# Plot the results
roadlayer = roadsIntersectionGDF.plot(color='grey', edgecolor='grey')
newBuildingsGDF.plot(ax=roadlayer, color='red')

# Put the WFS url in a variable again
wfsUrl = 'https://geo.rijkswaterstaat.nl/services/ogc/gdr/nwb_wegen/ows?service=WFS&request=getcapabilities&version=2.0.0'

# Create a WFS object
wfs = WebFeatureService(url=wfsUrl, version='2.0.0')

# Let's create a bit bigger bounding box for this example than last time
x, y = (173994.1578792833, 444133.60329471016)
xmin, xmax, ymin, ymax = x - 3000, x + 3000, y - 3000, y + 3000

# Get the features for the study area
response = wfs.getfeature(typename=list(wfs.contents)[-1], bbox=(xmin, ymin, xmax, ymax))
roadsGDF = gpd.read_file(response)

# Select the roads within Wageningen municipality
wageningenRoadsGDF = roadsGDF.loc[roadsGDF['gme_naam'] == 'Wageningen']

# Plot
wageningenRoadsGDF.plot(edgecolor='purple')

<Axes: >

"""
区别总结：

    iloc：基于整数位置索引，用于按行号选取数据（类似于数组的索引）。
    loc：基于标签名称索引，用于按行标签选取数据。

选择哪个：

    当你知道行号时，使用 iloc。
    当你知道行标签时，使用 loc。
"""

'\n区别总结：\n\n    iloc：基于整数位置索引，用于按行号选取数据（类似于数组的索引）。\n    loc：基于标签名称索引，用于按行标签选取数据。\n\n选择哪个：\n\n    当你知道行号时，使用 iloc。\n    当你知道行标签时，使用 loc。\n'

import osmnx as ox

# Using a geocoder to get the extent
city = ox.geocoder.geocode_to_gdf('Wageningen, Netherlands')
ox.plot.plot_footprints(ox.project_gdf(city), color='lightblue', bgcolor='#FFFFFF', 
                        alpha=0.8, edge_color='grey', edge_linewidth=2)
# Get bike network and create graph
wageningenRoadsGraph = ox.graph.graph_from_place('Wageningen, Netherlands', network_type='bike')

# Plot and save
ox.plot.plot_graph(wageningenRoadsGraph, figsize=(10, 10), node_size=2)
ox.io.save_graph_shapefile(G=wageningenRoadsGraph, filepath='data/OSMnetwork_Wageningen.shp')

# Metadata
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G=wageningenRoadsGraph)
print(gdf_nodes.info())
print(gdf_edges.info())

C:\Users\Xiaolu Yan\AppData\Local\Temp\ipykernel_4940\2228636502.py:12: FutureWarning: The `save_graph_shapefile` function is deprecated and will be removed in the v2.0.0 release. Instead, use the `save_graph_geopackage` function to save graphs as GeoPackage files for subsequent GIS analysis. See the OSMnx v2 migration guide: https://github.com/gboeing/osmnx/issues/1123
  ox.io.save_graph_shapefile(G=wageningenRoadsGraph, filepath='data/OSMnetwork_Wageningen.shp')
C:\Users\Xiaolu Yan\AppData\Roaming\Python\Python312\site-packages\osmnx\io.py:115: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.
  gdf_nodes.to_file(filepath_nodes, driver="ESRI Shapefile", index=True, encoding=encoding)
WARNING:fiona._env:Normalized/laundered field name: 'street_count' to 'street_cou'

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 3701 entries, 44544493 to 12233227758
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   y             3701 non-null   float64 
 1   x             3701 non-null   float64 
 2   street_count  3701 non-null   int64   
 3   highway       80 non-null     object  
 4   geometry      3701 non-null   geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 173.5+ KB
None
<class 'geopandas.geodataframe.GeoDataFrame'>
MultiIndex: 9394 entries, (44544493, 44546732, 0) to (12233227758, 12233205467, 0)
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   osmid      9394 non-null   object  
 1   name       5778 non-null   object  
 2   highway    9394 non-null   object  
 3   maxspeed   4932 non-null   object  
 4   oneway     9394 non-null   bool    
 5   reversed   9394 non-null   object  
 6   length     9394 non-null   float64 
 7   geometry   9394 non-null   geometry
 8   lanes      886 non-null    object  
 9   service    1418 non-null   object  
 10  access     193 non-null    object  
 11  ref        174 non-null    object  
 12  width      164 non-null    object  
 13  bridge     98 non-null     object  
 14  junction   54 non-null     object  
 15  tunnel     22 non-null     object  
 16  est_width  2 non-null      object  
dtypes: bool(1), float64(1), geometry(1), object(14)
memory usage: 1.5+ MB
None

# Origin
source = ox.distance.nearest_nodes(wageningenRoadsGraph, 5.665779, 51.987817)
print(source)

# Destination
target = ox.distance.nearest_nodes(wageningenRoadsGraph, 5.662409, 51.964870)
print(target)

# Compute shortest path 
shortestroute = ox.routing.shortest_path(G=wageningenRoadsGraph, orig=source, 
                                          dest=target, weight='length')
print(shortestroute)

# Plot
fig, ax = ox.plot.plot_graph_route(wageningenRoadsGraph, shortestroute, figsize=(20, 20), 
                         route_alpha=0.6, route_color='darkred',  bgcolor='white', 
                         node_color='darkgrey', edge_color='darkgrey',
                         route_linewidth=10, orig_dest_size=100)

fig.show()

7705210574
44630024
[7705210574, 1279165600, 7590239380, 1018846498, 702035974, 702035970, 702035971, 560099226, 3792795073, 702035958, 560549922, 10738782104, 10738782085, 7396278953, 560097296, 3435384072, 44722922, 565610983, 44715570, 9471874865, 1018849738, 8758602117, 598991587, 1018847573, 3445246222, 598991601, 984417579, 984417575, 6750793004, 44702961, 44700366, 7564240767, 598991563, 7564240760, 44696494, 44693918, 44691685, 7964521008, 599003427, 598726079, 599003415, 599003417, 599003418, 10207038656, 10593104438, 1604717027, 5068209225, 1604717042, 1898834025, 1898834002, 1898833973, 10041363062, 1898833952, 1898834050, 1898833992, 6437100580, 1898834053, 44645746, 44642317, 44640411, 44639358, 44637367, 44636223, 44634740, 44632960, 44630979, 8577019207, 44630024]

C:\Users\Xiaolu Yan\AppData\Local\Temp\ipykernel_4940\2962990885.py:20: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()

import folium

# Initialize the map
campusMap = folium.Map([51.98527485, 5.66370505205543], zoom_start=17)

# Re-project
buildingsGDF = buildingsGDF.to_crs(4326)

# Remove Timestamp objects
roadsPolygonGDF = roadsPolygonGDF.drop(columns=['wvk_begdat'])
  # Folium does not support Timestamp objects, thus this column has to be dropped
roadsPolygonGDF = roadsPolygonGDF.to_crs(4326)

# Add the buildings
folium.Choropleth(buildingsGDF, name='Building construction years',
                  data=buildingsGDF, columns=['identificatie', 'bouwjaar'],
                  key_on='feature.properties.identificatie', fill_color='RdYlGn',
                  fill_opacity=0.7, line_opacity=0.2,
                  legend_name='Construction year').add_to(campusMap)

# Add the roads
folium.GeoJson(roadsPolygonGDF).add_to(campusMap)

# roadsPolygonGDF.explore()

# Add layer control
folium.LayerControl().add_to(campusMap)

# Save (you can now open the generated .html file from the output directory)
campusMap.save('./output/campusMap.html')

from owslib.wcs import WebCoverageService

# Access the WCS by proving the url and optional arguments
wcs = WebCoverageService('https://service.pdok.nl/rws/ahn/wcs/v1_0?SERVICE=WCS&request=GetCapabilities', version='1.0.0')

# Print to check the contents of the WCS
print(list(wcs.contents))

['dsm_05m', 'dtm_05m']

# Get all operations and print the name of each of them
print([op.name for op in wcs.operations])

['GetCapabilities', 'DescribeCoverage', 'GetCoverage']

import os

# Define a bounding box in the available crs (see before) by picking a point and drawing a 1x1 km box around it
x, y = 174100, 444100
bbox = (x - 500, y - 500, x + 500, y + 500)

# Request the DSM data from the WCS
response = wcs.getCoverage(identifier='dsm_05m', bbox=bbox, format='GEOTIFF',
                           crs='urn:ogc:def:crs:EPSG::28992', resx=0.5, resy=0.5)

# Write the data to a local file in the 'data' directory
with open('data/AHN3_05m_DSM.tif', 'wb') as file:
    file.write(response.read())

# Do the same for the DTM
response = wcs.getCoverage(identifier='dtm_05m', bbox=bbox, format='GEOTIFF',
                           crs='urn:ogc:def:crs:EPSG::28992', resx=0.5, resy=0.5)

with open('data/AHN3_05m_DTM.tif', 'wb') as file:
    file.write(response.read())

from osgeo import gdal

# Open dataset, gdal automatically selects the correct driver
ds = gdal.Open("data/AHN3_05m_DSM.tif" )

# Get the band (band number 1)
band = ds.GetRasterBand(1)

# Get the data array
data = band.ReadAsArray()
print(data)

# Delete objects to close the file
ds = None

[[9.6194000e+00 9.6737003e+00 9.7122002e+00 ... 1.5386500e+01
  1.5427000e+01 1.5508700e+01]
 [9.7265997e+00 9.6819000e+00 9.6217003e+00 ... 1.5614400e+01
  1.5687100e+01 1.5700100e+01]
 [9.6140003e+00 9.5950003e+00 9.5971003e+00 ... 1.5810200e+01
  1.5840300e+01 1.5918300e+01]
 ...
 [8.1936998e+00 8.1719999e+00 8.1789999e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]
 [8.1758003e+00 8.1443005e+00 8.1541996e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]
 [8.1570997e+00 8.1427002e+00 8.1483002e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]]

C:\Users\Xiaolu Yan\AppData\Roaming\Python\Python312\site-packages\osgeo\gdal.py:312: FutureWarning: Neither gdal.UseExceptions() nor gdal.DontUseExceptions() has been explicitly called. In GDAL 4.0, exceptions will be enabled by default.
  warnings.warn(

import rasterio
from rasterio.plot import show
import matplotlib.pyplot as plt

# Open the two rasters 
dsm = rasterio.open("data/AHN3_05m_DSM.tif", driver="GTiff")
dtm = rasterio.open("data/AHN3_05m_DTM.tif", driver="GTiff")

# Metadata functions from Rasterio
print(dsm.meta)
print(dtm.meta)

# Plot with rasterio.plot, which provides Matplotlib functionality
plt.figure(figsize=(5, 5), dpi=300)  # adjust size and resolution
show(dsm, title='Digital Surface Model', cmap='gist_ncar')

WARNING:rasterio._env:CPLE_AppDefined in PROJ: proj_create_from_database: C:\Users\Xiaolu Yan\AppData\Roaming\Python\Python312\site-packages\osgeo\data\proj\proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 2 whereas a number >= 3 is expected. It comes from another PROJ installation.
WARNING:rasterio._env:CPLE_AppDefined in The definition of projected CRS EPSG:28992 got from GeoTIFF keys is not the same as the one from the EPSG registry, which may cause issues during reprojection operations. Set GTIFF_SRS_SOURCE configuration option to EPSG to use official parameters (overriding the ones from GeoTIFF keys), or to GEOKEYS to use custom values from GeoTIFF keys and drop the EPSG code.
WARNING:rasterio._env:CPLE_AppDefined in PROJ: proj_create_from_database: C:\Users\Xiaolu Yan\AppData\Roaming\Python\Python312\site-packages\osgeo\data\proj\proj.db contains DATABASE.LAYOUT.VERSION.MINOR = 2 whereas a number >= 3 is expected. It comes from another PROJ installation.
WARNING:rasterio._env:CPLE_AppDefined in The definition of projected CRS EPSG:28992 got from GeoTIFF keys is not the same as the one from the EPSG registry, which may cause issues during reprojection operations. Set GTIFF_SRS_SOURCE configuration option to EPSG to use official parameters (overriding the ones from GeoTIFF keys), or to GEOKEYS to use custom values from GeoTIFF keys and drop the EPSG code.

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': 3.4028234663852886e+38, 'width': 2000, 'height': 2000, 'count': 1, 'crs': CRS.from_wkt('LOCAL_CS["Amersfoort / RD New",UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'), 'transform': Affine(0.5, 0.0, 173600.0,
       0.0, -0.5, 444600.0)}
{'driver': 'GTiff', 'dtype': 'float32', 'nodata': 3.4028234663852886e+38, 'width': 2000, 'height': 2000, 'count': 1, 'crs': CRS.from_wkt('LOCAL_CS["Amersfoort / RD New",UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'), 'transform': Affine(0.5, 0.0, 173600.0,
       0.0, -0.5, 444600.0)}

<Axes: title={'center': 'Digital Surface Model'}>

# Rasterio object
print(type(dsm))

# Read, show object type and data
dsm_data = dsm.read(1)
print(type(dsm_data))
print(dsm_data)

<class 'rasterio.io.DatasetReader'>
<class 'numpy.ndarray'>
[[9.6194000e+00 9.6737003e+00 9.7122002e+00 ... 1.5386500e+01
  1.5427000e+01 1.5508700e+01]
 [9.7265997e+00 9.6819000e+00 9.6217003e+00 ... 1.5614400e+01
  1.5687100e+01 1.5700100e+01]
 [9.6140003e+00 9.5950003e+00 9.5971003e+00 ... 1.5810200e+01
  1.5840300e+01 1.5918300e+01]
 ...
 [8.1936998e+00 8.1719999e+00 8.1789999e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]
 [8.1758003e+00 8.1443005e+00 8.1541996e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]
 [8.1570997e+00 8.1427002e+00 8.1483002e+00 ... 3.4028235e+38
  3.4028235e+38 3.4028235e+38]]

import numpy as np

# Access the data from the two rasters
dsm_data = dsm.read()
dtm_data = dtm.read()

# Set our nodata to np.nan (this is important for later)
dsm_data[dsm_data == dsm.nodata] = np.nan
dtm_data[dtm_data == dtm.nodata] = np.nan

bands = dtm.count
rows, cols = dtm.height, dtm.width

print(bands)
print(rows, cols)

1
2000 2000

from rasterio.fill import fillnodata

# Create a mask to specify which pixels to fill (0=fill, 1=do not fill)
dtm_mask = dtm_data.copy()
dtm_mask[~np.isnan(dtm_data)] = 1
dtm_mask[np.isnan(dtm_data)] = 0

# Fill missing values
dtm_data = fillnodata(dtm_data, mask=dtm_mask)
"""
输入：原始的 dtm_data 包含了一些缺失值（表示为 NaN），而 dtm_mask 是一个掩膜数组，指示了哪些像元需要填充（0 表示需要填充，1 表示不需要填充）。

过程：fillnodata() 函数会使用邻近像元的有效值，按照某种规则（例如线性插值或反距离加权插值）填补 NaN 位置的值。

输出：填充后的 dtm_data 数组，其中之前的 NaN 值已经被合理地填充。
"""

'\n输入：原始的 dtm_data 包含了一些缺失值（表示为 NaN），而 dtm_mask 是一个掩膜数组，指示了哪些像元需要填充（0 表示需要填充，1 表示不需要填充）。\n\n过程：fillnodata() 函数会使用邻近像元的有效值，按照某种规则（例如线性插值或反距离加权插值）填补 NaN 位置的值。\n\n输出：填充后的 dtm_data 数组，其中之前的 NaN 值已经被合理地填充。\n'

# Subtract the NumPy arrays 
chm = dsm_data - dtm_data

# Check the resulting array
print(chm)

# Copy metadata of one of the rasters (does not matter which one)
kwargs = dsm.meta 

# Save the chm as a raster
with rasterio.open('data/AHN3_05m_CHM.tif', 'w', **kwargs) as file:
    file.write(chm.astype(rasterio.float32))

[[[0.         0.0187006  0.00889969 ... 3.199545   3.244937   3.3275146 ]
  [0.02470016 0.02740002 0.         ... 3.435215   3.5135546  3.5329914 ]
  [0.         0.         0.         ... 3.6233873  3.6674585  3.7521896 ]
  ...
  [0.         0.         0.         ...        nan        nan        nan]
  [0.         0.         0.         ...        nan        nan        nan]
  [0.         0.         0.         ...        nan        nan        nan]]]

import geopandas as gpd
import json
from owslib.wfs import WebFeatureService

# Get the WFS of the BAG
wfsUrl = 'https://service.pdok.nl/lv/bag/wfs/v2_0'
wfs = WebFeatureService(url=wfsUrl, version='2.0.0')
layer = list(wfs.contents)[0]

# Get the features for the study area
# notice that we now get them as json, in contrast to before
response = wfs.getfeature(typename=layer, bbox=bbox, outputFormat='json')
data = json.loads(response.read())

# Create GeoDataFrame, without saving first
buildings_gdf = gpd.GeoDataFrame.from_features(data['features'])

# Set crs to RD New
buildings_gdf.crs = 28992

import rasterstats as rs

# Apply the zonal statistics function with gdf and tif as input
chm_buildings = rs.zonal_stats(buildings_gdf, "data/AHN3_05m_CHM.tif", prefix='CHM_', geojson_out=True)

# Convert GeoJSON to GeoDataFrame
buildings_gdf = gpd.GeoDataFrame.from_features(chm_buildings)

# Check the added attributes with a prefix 'CHM_'
print(buildings_gdf['CHM_mean'])

0       6.146636
1      11.102594
2       7.686321
3       9.193958
4      13.348260
         ...    
168     6.932054
169     3.089736
170     5.571303
171     4.030652
172     2.582063
Name: CHM_mean, Length: 173, dtype: float64

# Create one plot with figure size 10 by 10
fig, ax = plt.subplots(1, figsize=(10, 10))

# Customize figure with title, legend, and facecolour
ax.set_title('Heights above ground (m) of buildings on the WUR campus')
buildings_gdf.plot(ax=ax, column='CHM_mean', k=6,
                   cmap=plt.cm.viridis, linewidth=1, edgecolor='black', legend=True)
ax.set_facecolor("lightgray")

# Make sure to get an equal scale in the x and y direction
plt.axis('equal')

# Visualize figure
plt.show()

# Create one plot with figure size 10 by 10
fig, ax = plt.subplots(figsize=(10, 10), dpi=200)

# imshow() is the main raster plotting method in Matplotlib
# Again, ensure an equal scale in the x and y direction
dsmplot = ax.imshow(dsm_data[0], cmap='Oranges', extent=bbox, aspect='equal')

# Title (do not do this for a scientific report, use a caption instead)
ax.set_title("Digital Surface Model - WUR Campus", fontsize=14)

# Add a legend (colourbar) with label
cbar = fig.colorbar(dsmplot, fraction=0.035, pad=0.01)
cbar.ax.get_yaxis().labelpad = 15
cbar.ax.set_ylabel('Height (m)', rotation=90)

# Hide the axes
ax.set_axis_off()
plt.show()

# Figure with three subplots, unpack directly
fig, (axdsm, axdtm, axchm) = plt.subplots(1, 3, figsize=(15, 7), dpi=200)

# Populate the three subplots with raster data
show(dsm_data, ax=axdsm, title='DSM')
show(dtm_data, ax=axdtm, title='Filled DTM')
show(chm, ax=axchm, title='CHM')
plt.show()

from rasterio.plot import show_hist

# Figure with three subplots, unpack directly
fig, (axdsm, axdtm, axchm) = plt.subplots(1, 3, figsize=(15, 7), dpi=200)

# Populate the three subplots with histograms
show_hist(dsm_data, ax=axdsm, bins=100, lw=0.0, stacked=False, alpha=0.3, title="Histogram DSM")
show_hist(dtm_data, ax=axdtm, bins=100, lw=0.0, stacked=False, alpha=0.3, title="Histogram filled DTM")
show_hist(chm, ax=axchm, bins=100, lw=0.0, stacked=False, alpha=0.3, title="Histogram CHM")

# Build legends and show
axdsm.legend(['DSM'])
axdtm.legend(['Filled DTM'])
axchm.legend(['CHM'])
plt.show()