From “Vector layers (geopandas)”

From “Vector layers (geopandas)”

Last updated: 2022-06-22 18:50:27

Exercise 08-a

import geopandas as gpd
/home/michael/.local/lib/python3.8/site-packages/geopandas/_compat.py:111: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.
  warnings.warn(
# Read and subset
stat = gpd.read_file("data/statisticalareas_demography2019.gdb")
stat = stat[["YISHUV_STAT11", "SHEM_YISHUV", "SHEM_YISHUV_ENGLISH", "Pop_Total", "geometry"]]
# Examine CRS
stat.crs
<Derived Projected CRS: EPSG:2039>
Name: Israel 1993 / Israeli TM Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Israel - onshore; Palestine Territory - onshore.
- bounds: (34.17, 29.45, 35.69, 33.28)
Coordinate Operation:
- name: Israeli TM
- method: Transverse Mercator
Datum: Israel 1993
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
# Feature count
stat.shape[0]
3195
# Are there any duplicates in the 'YISHUV_STAT11' column?
stat["YISHUV_STAT11"].duplicated().any()
False
# Plot
stat.plot(edgecolor="black", color="none");
_images/exercise_solutions_geopandas1_11_0.png
# Subset 'Beer-Sheva'
sel = stat["SHEM_YISHUV"] == "באר שבע"
stat1 = stat[sel]
# Plot
stat1.plot(column="Pop_Total", edgecolor="black", cmap="Reds", legend=True);
_images/exercise_solutions_geopandas1_14_0.png
# Feature count
stat1.shape[0]
62
# Sum of population
stat1["Pop_Total"].sum()
209685.0

Exercise 08-b

import pandas as pd
import geopandas as gpd
import shapely.geometry
# Read
agency = pd.read_csv("data/gtfs/agency.txt")
routes = pd.read_csv("data/gtfs/routes.txt")
trips = pd.read_csv("data/gtfs/trips.txt")
shapes = pd.read_csv("data/gtfs/shapes.txt")
# Subset
agency = agency[["agency_id", "agency_name"]]
routes = routes[["route_id", "agency_id", "route_short_name", "route_long_name"]]
trips = trips[["trip_id", "route_id", "shape_id"]]
shapes = shapes[["shape_id", "shape_pt_sequence", "shape_pt_lon", "shape_pt_lat"]]
# Subset agency
agency = agency[agency["agency_name"] == "דן באר שבע"]
routes = routes[routes["agency_id"] == agency["agency_id"].iloc[0]]
routes = routes.drop("agency_id", axis=1)
# Drop duplicate trips
trips = trips.drop_duplicates(subset="route_id")
trips = trips.drop("trip_id", axis=1)
# Join routes with shapes
routes = pd.merge(routes, trips, on="route_id", how="left")
routes = pd.merge(routes, shapes, on="shape_id", how="left")
# To points
geom = gpd.points_from_xy(routes["shape_pt_lon"], routes["shape_pt_lat"], crs=4326)
routes = gpd.GeoDataFrame(data=routes, geometry=geom)
routes = routes.drop(["shape_pt_lon", "shape_pt_lat"], axis=1)
# To lines
routes = routes.groupby("route_id").agg({
    "route_short_name": "first",
    "route_long_name": "first",
    "geometry": lambda x: shapely.geometry.LineString(x.to_list()),
}).reset_index()
routes = gpd.GeoDataFrame(routes, crs=4326)
routes
route_id route_short_name route_long_name geometry
0 142 שד. הצנחנים / צבי בן יעקב-באר ש... LINESTRING (34.77787 31.22497, ...
1 143 מסוף רמות-באר שבע<->שד. הצנחנים... LINESTRING (34.82207 31.28003, ...
2 178 14א ת. רכבת ב''ש צפון-באר שבע<->שד.... LINESTRING (34.80885 31.26133, ...
3 180 14א שד. הצנחנים / צבי בן יעקב-באר ש... LINESTRING (34.77787 31.22497, ...
4 185 16א כפר האירוסים/דרך אילן רמון-באר ... LINESTRING (34.75704 31.25477, ...
... ... ... ... ...
66 27981 27 ת.מרכזית/עירוניים לדרום-באר שבע... LINESTRING (34.79791 31.24329, ...
67 27982 27א הברזל/החורש-באר שבע<->מכללה טכנ... LINESTRING (34.80872 31.21809, ...
68 27983 27 מכללה טכנולוגית חיל האוויר/דרך ... LINESTRING (34.75665 31.25459, ...
69 27984 27א מכללה טכנולוגית חיל האוויר/דרך ... LINESTRING (34.75665 31.25459, ...
70 28740 18א ת.מרכזית/עירוניים לדרום-באר שבע... LINESTRING (34.79791 31.24329, ...

71 rows × 4 columns

# Calculate start points
pnt = [shapely.geometry.Point(line.coords[0]) for line in routes["geometry"].to_list()]
pnt = gpd.GeoSeries(pnt, crs=4326)
pnt
0     POINT (34.77787 31.22497)
1     POINT (34.82207 31.28003)
2     POINT (34.80885 31.26133)
3     POINT (34.77787 31.22497)
4     POINT (34.75704 31.25477)
                ...            
66    POINT (34.79791 31.24329)
67    POINT (34.80872 31.21809)
68    POINT (34.75665 31.25459)
69    POINT (34.75665 31.25459)
70    POINT (34.79791 31.24329)
Length: 71, dtype: geometry
# Plot
base = routes.plot(zorder=1)
pnt.plot(ax=base, color="red", zorder=2);
_images/exercise_solutions_geopandas1_28_0.png

Exercise 08-c

import geopandas as gpd

# Read
stat = gpd.read_file("data/statisticalareas_demography2019.gdb")

# Subset columns
sel = ["SHEM_YISHUV", "age_0_4", "age_5_9", "age_10_14", "age_15_19", "Pop_Total"]
stat = stat[sel]
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total
0 המעפיל 72.0 89.0 109.0 73.0 869.0
1 משגב עם 34.0 15.0 19.0 26.0 339.0
2 גאולים 77.0 86.0 92.0 69.0 968.0
3 להבות הבשן 56.0 78.0 99.0 87.0 915.0
4 מכמורת 116.0 130.0 121.0 79.0 1403.0
... ... ... ... ... ... ...
3190 קריית נטפים 165.0 165.0 151.0 67.0 958.0
3191 דולב 215.0 190.0 174.0 143.0 1448.0
3192 עתניאל 115.0 108.0 118.0 158.0 1044.0
3193 יצהר 412.0 357.0 240.0 81.0 1726.0
3194 מבואות יריחו NaN NaN NaN NaN NaN

3195 rows × 6 columns

# Replace "No Data" with zero
sel = ["age_0_4", "age_5_9", "age_10_14", "age_15_19", "Pop_Total"]
stat[sel] = stat[sel].fillna(0)
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total
0 המעפיל 72.0 89.0 109.0 73.0 869.0
1 משגב עם 34.0 15.0 19.0 26.0 339.0
2 גאולים 77.0 86.0 92.0 69.0 968.0
3 להבות הבשן 56.0 78.0 99.0 87.0 915.0
4 מכמורת 116.0 130.0 121.0 79.0 1403.0
... ... ... ... ... ... ...
3190 קריית נטפים 165.0 165.0 151.0 67.0 958.0
3191 דולב 215.0 190.0 174.0 143.0 1448.0
3192 עתניאל 115.0 108.0 118.0 158.0 1044.0
3193 יצהר 412.0 357.0 240.0 81.0 1726.0
3194 מבואות יריחו 0.0 0.0 0.0 0.0 0.0

3195 rows × 6 columns

# Subset towns with total population >5000
sel = stat["Pop_Total"] > 5000
stat = stat[sel].copy()
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total
28 ירושלים 2579.0 2594.0 2431.0 2411.0 23581.0
29 ירושלים 3161.0 2997.0 2815.0 2786.0 22258.0
30 ירושלים 1714.0 1585.0 1373.0 1364.0 11336.0
31 ירושלים 2469.0 2436.0 2369.0 2378.0 19591.0
32 ירושלים 3393.0 3293.0 3172.0 3267.0 29503.0
... ... ... ... ... ... ...
3133 רעננה 797.0 668.0 527.0 411.0 6978.0
3136 רעננה 471.0 561.0 619.0 489.0 6680.0
3142 שפרעם 3506.0 3431.0 3637.0 4298.0 42137.0
3152 טמרה 3155.0 3206.0 3451.0 3700.0 34392.0
3172 באר שבע 437.0 400.0 384.0 393.0 5205.0

471 rows × 6 columns

# Calculate total population in ages 0-19
stat["Pop_Total_0_19"] = stat["age_0_4"] + stat["age_5_9"] + stat["age_10_14"] + stat["age_15_19"]
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total Pop_Total_0_19
28 ירושלים 2579.0 2594.0 2431.0 2411.0 23581.0 10015.0
29 ירושלים 3161.0 2997.0 2815.0 2786.0 22258.0 11759.0
30 ירושלים 1714.0 1585.0 1373.0 1364.0 11336.0 6036.0
31 ירושלים 2469.0 2436.0 2369.0 2378.0 19591.0 9652.0
32 ירושלים 3393.0 3293.0 3172.0 3267.0 29503.0 13125.0
... ... ... ... ... ... ... ...
3133 רעננה 797.0 668.0 527.0 411.0 6978.0 2403.0
3136 רעננה 471.0 561.0 619.0 489.0 6680.0 2140.0
3142 שפרעם 3506.0 3431.0 3637.0 4298.0 42137.0 14872.0
3152 טמרה 3155.0 3206.0 3451.0 3700.0 34392.0 13512.0
3172 באר שבע 437.0 400.0 384.0 393.0 5205.0 1614.0

471 rows × 7 columns

# Aggregate
stat = stat.groupby("SHEM_YISHUV").sum().reset_index()
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total Pop_Total_0_19
0 אבו גוש 784.0 720.0 740.0 717.0 7698.0 2961.0
1 אבו סנאן 1281.0 1148.0 1229.0 1428.0 14099.0 5086.0
2 אבן יהודה 926.0 1335.0 1482.0 1158.0 13826.0 4901.0
3 אום אל-פחם 5639.0 5587.0 5955.0 6367.0 56109.0 23548.0
4 אופקים 3112.0 2519.0 1901.0 1489.0 21793.0 9021.0
... ... ... ... ... ... ... ...
174 שערי תקווה 564.0 645.0 638.0 481.0 6039.0 2328.0
175 שפרעם 3506.0 3431.0 3637.0 4298.0 42137.0 14872.0
176 תל אביב -יפו 7217.0 6461.0 5473.0 4941.0 89404.0 24092.0
177 תל מונד 881.0 1202.0 1371.0 1346.0 13144.0 4800.0
178 תל שבע 3536.0 3083.0 2780.0 2770.0 20808.0 12169.0

179 rows × 7 columns

# Calculate ratio
stat["ratio"] = stat["Pop_Total_0_19"] / stat["Pop_Total"]
stat
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total Pop_Total_0_19 ratio
0 אבו גוש 784.0 720.0 740.0 717.0 7698.0 2961.0 0.384645
1 אבו סנאן 1281.0 1148.0 1229.0 1428.0 14099.0 5086.0 0.360735
2 אבן יהודה 926.0 1335.0 1482.0 1158.0 13826.0 4901.0 0.354477
3 אום אל-פחם 5639.0 5587.0 5955.0 6367.0 56109.0 23548.0 0.419683
4 אופקים 3112.0 2519.0 1901.0 1489.0 21793.0 9021.0 0.413940
... ... ... ... ... ... ... ... ...
174 שערי תקווה 564.0 645.0 638.0 481.0 6039.0 2328.0 0.385494
175 שפרעם 3506.0 3431.0 3637.0 4298.0 42137.0 14872.0 0.352944
176 תל אביב -יפו 7217.0 6461.0 5473.0 4941.0 89404.0 24092.0 0.269473
177 תל מונד 881.0 1202.0 1371.0 1346.0 13144.0 4800.0 0.365186
178 תל שבע 3536.0 3083.0 2780.0 2770.0 20808.0 12169.0 0.584823

179 rows × 8 columns

# Classify and sort
stat["type"] = "old"
sel = stat["ratio"] >= stat["ratio"].mean()
stat.loc[sel, "type"] = "young"
stat.sort_values(by="ratio")
SHEM_YISHUV age_0_4 age_5_9 age_10_14 age_15_19 Pop_Total Pop_Total_0_19 ratio type
149 קריית ביאליק 696.0 646.0 634.0 635.0 10980.0 2611.0 0.237796 old
118 נשר 459.0 491.0 446.0 403.0 7123.0 1799.0 0.252562 old
32 בני עי"ש 518.0 460.0 389.0 424.0 6978.0 1791.0 0.256664 old
164 רמת גן 4106.0 3764.0 3511.0 3077.0 53747.0 14458.0 0.269001 old
176 תל אביב -יפו 7217.0 6461.0 5473.0 4941.0 89404.0 24092.0 0.269473 old
... ... ... ... ... ... ... ... ... ...
63 חורה 4087.0 3553.0 3001.0 2799.0 22337.0 13440.0 0.601692 young
29 בית שמש 18999.0 17515.0 14383.0 9976.0 100881.0 60873.0 0.603414 young
10 אלעד 7271.0 8064.0 8142.0 6457.0 48764.0 29934.0 0.613854 young
30 ביתר עילית 9963.0 8915.0 7437.0 4291.0 46889.0 30606.0 0.652733 young
105 מודיעין עילית 14246.0 13303.0 9782.0 5517.0 62919.0 42848.0 0.681003 young

179 rows × 9 columns

Exercise 08-d

import pandas as pd
import geopandas as gpd
import shapely.geometry
# Read
agency = pd.read_csv("data/gtfs/agency.txt")
routes = pd.read_csv("data/gtfs/routes.txt")
trips = pd.read_csv("data/gtfs/trips.txt")
shapes = pd.read_csv("data/gtfs/shapes.txt")
# Subset
agency = agency[["agency_id", "agency_name"]]
routes = routes[["route_id", "agency_id", "route_short_name", "route_long_name"]]
trips = trips[["trip_id", "route_id", "shape_id"]]
shapes = shapes[["shape_id", "shape_pt_sequence", "shape_pt_lon", "shape_pt_lat"]]
# Subset agency
agency = agency[agency["agency_name"] == "מטרופולין"]
routes = routes[routes["agency_id"] == agency["agency_id"].iloc[0]]
routes = routes.drop("agency_id", axis=1)
# Drop duplicate trips
trips = trips.drop_duplicates(subset="route_id")
trips = trips.drop("trip_id", axis=1)
# Join routes with shapes
routes = pd.merge(routes, trips, on="route_id", how="left")
routes = pd.merge(routes, shapes, on="shape_id", how="left")
# To points
geom = gpd.points_from_xy(routes["shape_pt_lon"], routes["shape_pt_lat"], crs=4326)
routes = gpd.GeoDataFrame(data=routes, geometry=geom)
routes = routes.drop(["shape_pt_lon", "shape_pt_lat"], axis=1)
# To lines
routes = routes.groupby("route_id").agg({
    "route_short_name": "first",
    "route_long_name": "first",
    "geometry": lambda x: shapely.geometry.LineString(x.to_list()),
}).reset_index()
routes = gpd.GeoDataFrame(routes, crs=4326)
# Calculate 'delta_lon' and 'delta_lat'
b = routes.bounds
delta_lon = b["maxx"] - b["minx"]
delta_lat = b["maxy"] - b["miny"]
# Subset maximum 'delta_lon' and 'delta_lat'
routes1 = routes.iloc[[delta_lon.idxmax(), delta_lat.idxmax()], :]
routes1
route_id route_short_name route_long_name geometry
560 26209 400 מלון הוד-תמר<->ת. רכבת להבים רה... LINESTRING (35.36330 31.20181, ...
310 11718 660 מרכז תפוח-מצפה רמון<->ת. מרכזית... LINESTRING (34.80221 30.61085, ...
# Plot
routes1.plot(column="route_short_name", cmap="Set1");
_images/exercise_solutions_geopandas1_51_0.png