Frequently Asked Questions (FAQ)#

Last updated: 2023-02-25 13:39:58

pandas#

Append to CSV if exists#

Sample data#

import pandas as pd
dat = pd.read_csv('output/stations.csv')
dat
name city lines piano lon lat
0 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
1 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
2 Dimona Dimona 1 False 35.011635 31.068616

Example#

# Delete output file if exists
import os
path_out = 'pandas_append_example.csv'
if os.path.exists(path_out):
    os.remove(path_out)
# Write
for i in range(3):
    if not os.path.exists(path_out):
        dat.to_csv(path_out, index=False)
    else:
        dat.to_csv(path_out, mode='a', header=False, index=False)
# Print output file contents
pd.read_csv(path_out)
name city lines piano lon lat
0 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
1 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
2 Dimona Dimona 1 False 35.011635 31.068616
3 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
4 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
5 Dimona Dimona 1 False 35.011635 31.068616
6 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
7 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
8 Dimona Dimona 1 False 35.011635 31.068616
# Delete output file
if os.path.exists(path_out):
    os.remove(path_out)

Split DataFrame to parts#

Sample data#

import pandas as pd
dat = pd.read_csv('output/stations.csv')
dat
name city lines piano lon lat
0 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
1 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
2 Dimona Dimona 1 False 35.011635 31.068616

Function definition#

import math
def split_dataframe(df, chunk_size = 1_000_000): 
    chunks = list()
    num_chunks = math.ceil(len(df) / chunk_size)
    for i in range(num_chunks):
        chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    return chunks

Example#

split_dataframe(dat, 2)
[                    name        city  lines  piano        lon        lat
 0      Beer-Sheva Center  Beer-Sheva      4  False  34.798443  31.243288
 1  Beer-Sheva University  Beer-Sheva      5   True  34.812831  31.260284,
      name    city  lines  piano        lon        lat
 2  Dimona  Dimona      1  False  35.011635  31.068616]

References:

Shift column(s) to beginning#

Sample data#

import pandas as pd
dat = pd.read_csv('output/stations.csv')
dat
name city lines piano lon lat
0 Beer-Sheva Center Beer-Sheva 4 False 34.798443 31.243288
1 Beer-Sheva University Beer-Sheva 5 True 34.812831 31.260284
2 Dimona Dimona 1 False 35.011635 31.068616

Example#

cols = ['lon', 'lat']
dat = dat[cols  + [col for col in dat.columns if col not in cols]]
dat
lon lat name city lines piano
0 34.798443 31.243288 Beer-Sheva Center Beer-Sheva 4 False
1 34.812831 31.260284 Beer-Sheva University Beer-Sheva 5 True
2 35.011635 31.068616 Dimona Dimona 1 False

References:

geopandas#

Calculating distances in WGS84#

Question#

How can we calculate distances over large regions given lon/lat points?

Sample data#

Two (lon,lat) points:

pnt1 = (0, 0)
pnt2 = (1, 0)

True distance according to Wikipedia:

dist = 111320

Using the Harvesine formula (less accurate)#

See Example: distance function:

import math
def distance(origin, destination):
    lon1, lat1 = origin
    lon2, lat2 = destination
    radius = 6371000
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dlon / 2) * math.sin(dlon / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = radius * c
    return d
result = distance(pnt1, pnt2)
result = round(result)
result
111195
dist-result  ## Error of 125 meters
125

Using geodesic distance function from geopy (most accurate)#

See geopy documentation:

import geopy.distance
result = geopy.distance.distance(tuple(reversed(pnt1)), tuple(reversed(pnt2))).meters
result = round(result)
result
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
/tmp/ipykernel_13018/2061613278.py in <module>
----> 1 import geopy.distance
      2 result = geopy.distance.distance(tuple(reversed(pnt1)), tuple(reversed(pnt2))).meters
      3 result = round(result)
      4 result

ModuleNotFoundError: No module named 'geopy'
dist - result  ## Error of 1 meter
1

Beyond distance: The S2 Geometry Library#

The S2 Geometry Library by Google can be used for more complicated calculations in WGS84, such as polygon area. It has a Python interface called s2sphere.

rasterio#

Splitting a raster#

Question#

How can we split a raster into two equal halves, such as east-west or north-south?

Sample data#

import numpy as np
import rasterio
import rasterio.plot
src = rasterio.open('output/carmel2.tif')
rasterio.plot.show(src);
_images/faq_53_0.png

Example#

# Column where we split to east-west
x = round(src.shape[1] / 2)
x
266
# Western part
w1 = rasterio.windows.Window(0, 0, x, src.shape[0])
r1 = src.read(1, window=w1)
w1_transform = src.window_transform(w1)
rasterio.plot.show(r1, transform=w1_transform);
_images/faq_56_0.png
# Eastern part
w2 = rasterio.windows.Window(x, 0, src.shape[1]-x, src.shape[0])
r2 = src.read(1, window=w2)
w2_transform = src.window_transform(w2)
rasterio.plot.show(r2, transform=w2_transform);
_images/faq_57_0.png

arcpy#

Listing all layers on current map#

import arcpy
aprx = arcpy.mp.ArcGISProject("CURRENT")
map = aprx.listMaps()[0]
layers = map.listLayers()
print([i.dataSource for i in layers])
_images/arcpy_list_layers.png

Fig. 84 Listing layers on current map with arcpy#

Transforming ecw to tif#

arcpy.env.workspace = r"\\VBOXSVR\Downloads\ortho_2015"
files = arcpy.ListFiles("*.ecw")
for i in files:
    arcpy.management.CopyRaster(i, i.replace(".ecw", ".tif"), '', None, "256", "NONE", "NONE", '', "NONE", "NONE", "TIFF", "NONE", "CURRENT_SLICE", "NO_TRANSPOSE")