From “Vector layers (`geopandas`)”#

Last updated: 2023-02-25 13:36:34

Exercise 08-a#

import geopandas as gpd

# Read and subset
stat = gpd.read_file("data/statisticalareas_demography2019.gdb")
stat = stat[["YISHUV_STAT11", "SHEM_YISHUV", "SHEM_YISHUV_ENGLISH", "Pop_Total", "geometry"]]

# Examine CRS
stat.crs

<Derived Projected CRS: EPSG:2039>
Name: Israel 1993 / Israeli TM Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Israel - onshore; Palestine Territory - onshore.
- bounds: (34.17, 29.45, 35.69, 33.28)
Coordinate Operation:
- name: Israeli TM
- method: Transverse Mercator
Datum: Israel 1993
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

# Feature count
stat.shape[0]

# Are there any duplicates in the 'YISHUV_STAT11' column?
stat["YISHUV_STAT11"].duplicated().any()

False

# Plot
stat.plot(edgecolor="black", color="none");

_images/exercise_solutions_geopandas1_11_0.png

# Subset 'Beer-Sheva'
sel = stat["SHEM_YISHUV"] == "באר שבע"
stat1 = stat[sel]

# Plot
stat1.plot(column="Pop_Total", edgecolor="black", cmap="Reds", legend=True);

_images/exercise_solutions_geopandas1_14_0.png

# Feature count
stat1.shape[0]

# Sum of population
stat1["Pop_Total"].sum()

209685.0

Exercise 08-b#

import pandas as pd
import geopandas as gpd
import shapely.geometry

# Read
agency = pd.read_csv("data/gtfs/agency.txt")
routes = pd.read_csv("data/gtfs/routes.txt")
trips = pd.read_csv("data/gtfs/trips.txt")
shapes = pd.read_csv("data/gtfs/shapes.txt")

# Subset
agency = agency[["agency_id", "agency_name"]]
routes = routes[["route_id", "agency_id", "route_short_name", "route_long_name"]]
trips = trips[["trip_id", "route_id", "shape_id"]]
shapes = shapes[["shape_id", "shape_pt_sequence", "shape_pt_lon", "shape_pt_lat"]]

# Subset agency
agency = agency[agency["agency_name"] == "דן באר שבע"]
routes = routes[routes["agency_id"] == agency["agency_id"].iloc[0]]
routes = routes.drop("agency_id", axis=1)

# Drop duplicate trips
trips = trips.drop_duplicates(subset="route_id")
trips = trips.drop("trip_id", axis=1)

# Join routes with shapes
routes = pd.merge(routes, trips, on="route_id", how="left")
routes = pd.merge(routes, shapes, on="shape_id", how="left")

# To points
geom = gpd.points_from_xy(routes["shape_pt_lon"], routes["shape_pt_lat"], crs=4326)
routes = gpd.GeoDataFrame(data=routes, geometry=geom)
routes = routes.drop(["shape_pt_lon", "shape_pt_lat"], axis=1)

# To lines
routes = routes.groupby("route_id").agg({
    "route_short_name": "first",
    "route_long_name": "first",
    "geometry": lambda x: shapely.geometry.LineString(x.to_list()),
}).reset_index()
routes = gpd.GeoDataFrame(routes, crs=4326)
routes

	route_id	route_short_name	route_long_name	geometry
0	142	2א	שד. הצנחנים / צבי בן יעקב-באר ש...	LINESTRING (34.77787 31.22497, ...
1	143	2א	מסוף רמות-באר שבע<->שד. הצנחנים...	LINESTRING (34.82207 31.28003, ...
2	178	14א	ת. רכבת ב''ש צפון-באר שבע<->שד....	LINESTRING (34.80885 31.26133, ...
3	180	14א	שד. הצנחנים / צבי בן יעקב-באר ש...	LINESTRING (34.77787 31.22497, ...
4	185	16א	כפר האירוסים/דרך אילן רמון-באר ...	LINESTRING (34.75704 31.25477, ...
...	...	...	...	...
66	27981	27	ת.מרכזית/עירוניים לדרום-באר שבע...	LINESTRING (34.79791 31.24329, ...
67	27982	27א	הברזל/החורש-באר שבע<->מכללה טכנ...	LINESTRING (34.80872 31.21809, ...
68	27983	27	מכללה טכנולוגית חיל האוויר/דרך ...	LINESTRING (34.75665 31.25459, ...
69	27984	27א	מכללה טכנולוגית חיל האוויר/דרך ...	LINESTRING (34.75665 31.25459, ...
70	28740	18א	ת.מרכזית/עירוניים לדרום-באר שבע...	LINESTRING (34.79791 31.24329, ...

71 rows × 4 columns

# Calculate start points
pnt = [shapely.geometry.Point(line.coords[0]) for line in routes["geometry"].to_list()]
pnt = gpd.GeoSeries(pnt, crs=4326)
pnt

   POINT (34.77787 31.22497)
   POINT (34.82207 31.28003)
   POINT (34.80885 31.26133)
   POINT (34.77787 31.22497)
   POINT (34.75704 31.25477)
                ...            
  POINT (34.79791 31.24329)
  POINT (34.80872 31.21809)
  POINT (34.75665 31.25459)
  POINT (34.75665 31.25459)
  POINT (34.79791 31.24329)
Length: 71, dtype: geometry

# Plot
base = routes.plot(zorder=1)
pnt.plot(ax=base, color="red", zorder=2);

_images/exercise_solutions_geopandas1_28_0.png

Exercise 08-c#

import geopandas as gpd

# Read
stat = gpd.read_file("data/statisticalareas_demography2019.gdb")

# Subset columns
sel = ["SHEM_YISHUV", "age_0_4", "age_5_9", "age_10_14", "age_15_19", "Pop_Total"]
stat = stat[sel]
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total
0	המעפיל	72.0	89.0	109.0	73.0	869.0
1	משגב עם	34.0	15.0	19.0	26.0	339.0
2	גאולים	77.0	86.0	92.0	69.0	968.0
3	להבות הבשן	56.0	78.0	99.0	87.0	915.0
4	מכמורת	116.0	130.0	121.0	79.0	1403.0
...	...	...	...	...	...	...
3190	קריית נטפים	165.0	165.0	151.0	67.0	958.0
3191	דולב	215.0	190.0	174.0	143.0	1448.0
3192	עתניאל	115.0	108.0	118.0	158.0	1044.0
3193	יצהר	412.0	357.0	240.0	81.0	1726.0
3194	מבואות יריחו	NaN	NaN	NaN	NaN	NaN

3195 rows × 6 columns

# Replace "No Data" with zero
sel = ["age_0_4", "age_5_9", "age_10_14", "age_15_19", "Pop_Total"]
stat[sel] = stat[sel].fillna(0)
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total
0	המעפיל	72.0	89.0	109.0	73.0	869.0
1	משגב עם	34.0	15.0	19.0	26.0	339.0
2	גאולים	77.0	86.0	92.0	69.0	968.0
3	להבות הבשן	56.0	78.0	99.0	87.0	915.0
4	מכמורת	116.0	130.0	121.0	79.0	1403.0
...	...	...	...	...	...	...
3190	קריית נטפים	165.0	165.0	151.0	67.0	958.0
3191	דולב	215.0	190.0	174.0	143.0	1448.0
3192	עתניאל	115.0	108.0	118.0	158.0	1044.0
3193	יצהר	412.0	357.0	240.0	81.0	1726.0
3194	מבואות יריחו	0.0	0.0	0.0	0.0	0.0

3195 rows × 6 columns

# Subset towns with total population >5000
sel = stat["Pop_Total"] > 5000
stat = stat[sel].copy()
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total
28	ירושלים	2579.0	2594.0	2431.0	2411.0	23581.0
29	ירושלים	3161.0	2997.0	2815.0	2786.0	22258.0
30	ירושלים	1714.0	1585.0	1373.0	1364.0	11336.0
31	ירושלים	2469.0	2436.0	2369.0	2378.0	19591.0
32	ירושלים	3393.0	3293.0	3172.0	3267.0	29503.0
...	...	...	...	...	...	...
3133	רעננה	797.0	668.0	527.0	411.0	6978.0
3136	רעננה	471.0	561.0	619.0	489.0	6680.0
3142	שפרעם	3506.0	3431.0	3637.0	4298.0	42137.0
3152	טמרה	3155.0	3206.0	3451.0	3700.0	34392.0
3172	באר שבע	437.0	400.0	384.0	393.0	5205.0

471 rows × 6 columns

# Calculate total population in ages 0-19
stat["Pop_Total_0_19"] = stat["age_0_4"] + stat["age_5_9"] + stat["age_10_14"] + stat["age_15_19"]
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total	Pop_Total_0_19
28	ירושלים	2579.0	2594.0	2431.0	2411.0	23581.0	10015.0
29	ירושלים	3161.0	2997.0	2815.0	2786.0	22258.0	11759.0
30	ירושלים	1714.0	1585.0	1373.0	1364.0	11336.0	6036.0
31	ירושלים	2469.0	2436.0	2369.0	2378.0	19591.0	9652.0
32	ירושלים	3393.0	3293.0	3172.0	3267.0	29503.0	13125.0
...	...	...	...	...	...	...	...
3133	רעננה	797.0	668.0	527.0	411.0	6978.0	2403.0
3136	רעננה	471.0	561.0	619.0	489.0	6680.0	2140.0
3142	שפרעם	3506.0	3431.0	3637.0	4298.0	42137.0	14872.0
3152	טמרה	3155.0	3206.0	3451.0	3700.0	34392.0	13512.0
3172	באר שבע	437.0	400.0	384.0	393.0	5205.0	1614.0

471 rows × 7 columns

# Aggregate
stat = stat.groupby("SHEM_YISHUV").sum().reset_index()
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total	Pop_Total_0_19
0	אבו גוש	784.0	720.0	740.0	717.0	7698.0	2961.0
1	אבו סנאן	1281.0	1148.0	1229.0	1428.0	14099.0	5086.0
2	אבן יהודה	926.0	1335.0	1482.0	1158.0	13826.0	4901.0
3	אום אל-פחם	5639.0	5587.0	5955.0	6367.0	56109.0	23548.0
4	אופקים	3112.0	2519.0	1901.0	1489.0	21793.0	9021.0
...	...	...	...	...	...	...	...
174	שערי תקווה	564.0	645.0	638.0	481.0	6039.0	2328.0
175	שפרעם	3506.0	3431.0	3637.0	4298.0	42137.0	14872.0
176	תל אביב -יפו	7217.0	6461.0	5473.0	4941.0	89404.0	24092.0
177	תל מונד	881.0	1202.0	1371.0	1346.0	13144.0	4800.0
178	תל שבע	3536.0	3083.0	2780.0	2770.0	20808.0	12169.0

179 rows × 7 columns

# Calculate ratio
stat["ratio"] = stat["Pop_Total_0_19"] / stat["Pop_Total"]
stat

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total	Pop_Total_0_19	ratio
0	אבו גוש	784.0	720.0	740.0	717.0	7698.0	2961.0	0.384645
1	אבו סנאן	1281.0	1148.0	1229.0	1428.0	14099.0	5086.0	0.360735
2	אבן יהודה	926.0	1335.0	1482.0	1158.0	13826.0	4901.0	0.354477
3	אום אל-פחם	5639.0	5587.0	5955.0	6367.0	56109.0	23548.0	0.419683
4	אופקים	3112.0	2519.0	1901.0	1489.0	21793.0	9021.0	0.413940
...	...	...	...	...	...	...	...	...
174	שערי תקווה	564.0	645.0	638.0	481.0	6039.0	2328.0	0.385494
175	שפרעם	3506.0	3431.0	3637.0	4298.0	42137.0	14872.0	0.352944
176	תל אביב -יפו	7217.0	6461.0	5473.0	4941.0	89404.0	24092.0	0.269473
177	תל מונד	881.0	1202.0	1371.0	1346.0	13144.0	4800.0	0.365186
178	תל שבע	3536.0	3083.0	2780.0	2770.0	20808.0	12169.0	0.584823

179 rows × 8 columns

# Classify and sort
stat["type"] = "old"
sel = stat["ratio"] >= stat["ratio"].mean()
stat.loc[sel, "type"] = "young"
stat.sort_values(by="ratio")

	SHEM_YISHUV	age_0_4	age_5_9	age_10_14	age_15_19	Pop_Total	Pop_Total_0_19	ratio	type
149	קריית ביאליק	696.0	646.0	634.0	635.0	10980.0	2611.0	0.237796	old
118	נשר	459.0	491.0	446.0	403.0	7123.0	1799.0	0.252562	old
32	בני עי"ש	518.0	460.0	389.0	424.0	6978.0	1791.0	0.256664	old
164	רמת גן	4106.0	3764.0	3511.0	3077.0	53747.0	14458.0	0.269001	old
176	תל אביב -יפו	7217.0	6461.0	5473.0	4941.0	89404.0	24092.0	0.269473	old
...	...	...	...	...	...	...	...	...	...
63	חורה	4087.0	3553.0	3001.0	2799.0	22337.0	13440.0	0.601692	young
29	בית שמש	18999.0	17515.0	14383.0	9976.0	100881.0	60873.0	0.603414	young
10	אלעד	7271.0	8064.0	8142.0	6457.0	48764.0	29934.0	0.613854	young
30	ביתר עילית	9963.0	8915.0	7437.0	4291.0	46889.0	30606.0	0.652733	young
105	מודיעין עילית	14246.0	13303.0	9782.0	5517.0	62919.0	42848.0	0.681003	young

179 rows × 9 columns

Exercise 08-d#

import pandas as pd
import geopandas as gpd
import shapely.geometry

# Read
agency = pd.read_csv("data/gtfs/agency.txt")
routes = pd.read_csv("data/gtfs/routes.txt")
trips = pd.read_csv("data/gtfs/trips.txt")
shapes = pd.read_csv("data/gtfs/shapes.txt")

# Subset
agency = agency[["agency_id", "agency_name"]]
routes = routes[["route_id", "agency_id", "route_short_name", "route_long_name"]]
trips = trips[["trip_id", "route_id", "shape_id"]]
shapes = shapes[["shape_id", "shape_pt_sequence", "shape_pt_lon", "shape_pt_lat"]]

# Subset agency
agency = agency[agency["agency_name"] == "מטרופולין"]
routes = routes[routes["agency_id"] == agency["agency_id"].iloc[0]]
routes = routes.drop("agency_id", axis=1)

# Drop duplicate trips
trips = trips.drop_duplicates(subset="route_id")
trips = trips.drop("trip_id", axis=1)

# Join routes with shapes
routes = pd.merge(routes, trips, on="route_id", how="left")
routes = pd.merge(routes, shapes, on="shape_id", how="left")

# To points
geom = gpd.points_from_xy(routes["shape_pt_lon"], routes["shape_pt_lat"], crs=4326)
routes = gpd.GeoDataFrame(data=routes, geometry=geom)
routes = routes.drop(["shape_pt_lon", "shape_pt_lat"], axis=1)

# To lines
routes = routes.groupby("route_id").agg({
    "route_short_name": "first",
    "route_long_name": "first",
    "geometry": lambda x: shapely.geometry.LineString(x.to_list()),
}).reset_index()
routes = gpd.GeoDataFrame(routes, crs=4326)

# Calculate 'delta_lon' and 'delta_lat'
b = routes.bounds
delta_lon = b["maxx"] - b["minx"]
delta_lat = b["maxy"] - b["miny"]

# Subset maximum 'delta_lon' and 'delta_lat'
routes1 = routes.iloc[[delta_lon.idxmax(), delta_lat.idxmax()], :]
routes1

	route_id	route_short_name	route_long_name	geometry
560	26209	400	מלון הוד-תמר<->ת. רכבת להבים רה...	LINESTRING (35.36330 31.20181, ...
310	11718	660	מרכז תפוח-מצפה רמון<->ת. מרכזית...	LINESTRING (34.80221 30.61085, ...

# Plot
routes1.plot(column="route_short_name", cmap="Set1");

_images/exercise_solutions_geopandas1_51_0.png

Spatial Data Programming with Python

From “Vector layers (geopandas)”

Contents

From “Vector layers (`geopandas`)”#

Exercise 08-a#

Exercise 08-b#

Exercise 08-c#

Exercise 08-d#

Spatial Data Programming with Python

From “Vector layers (geopandas)”

Contents

From “Vector layers (geopandas)”#

Exercise 08-a#

Exercise 08-b#

Exercise 08-c#

Exercise 08-d#

From “Vector layers (`geopandas`)”#