Contents

From “Tables (pandas)”

Contents

From “Tables (`pandas`)”#

Last updated: 2023-02-25 13:37:49

Exercise 05-d#

Exercise 05-g#

import pandas as pd
students = pd.read_csv("data/students.csv")

# Total number of students in 'Ben-Gurion Univ. of the Negev'
students["Ben-Gurion Univ. of the Negev"].sum()

# Total number of students in 'Ariel University
students["Ariel University"].sum()

46633.0

students["year"] = pd.to_numeric(students["year"].str.split("/").str[0]) + 1

# Range of observed years
x = students["year"]
[x.min(), x.max()]

[1970, 2020]

# Year with highest number of students in 'Ben-Gurion Univ. of the Negev'
students["year"][students["Ben-Gurion Univ. of the Negev"].idxmax()]

# Year with highest number of students in 'Weizmann Institute of Science'
students["year"][students["Weizmann Institute of Science"].idxmax()]

Exercise 05-h#

import numpy as np
import pandas as pd
kinneret = pd.read_csv("data/kinneret_level.csv")
kinneret = kinneret.rename(columns = {"Survey_Date": "date", "Kinneret_Level": "value"})
kinneret["date"] = pd.to_datetime(kinneret["date"])
dates = pd.date_range(kinneret["date"].min(), kinneret["date"].max())
kinneret = kinneret.set_index("date")
kinneret = kinneret.reindex(dates, fill_value = np.nan)

/tmp/ipykernel_12631/3672199559.py:5: UserWarning: Parsing dates in DD/MM/YYYY format when dayfirst=False (the default) was specified. This may lead to inconsistently parsed dates! Specify a format to ensure consistent parsing.
  kinneret["date"] = pd.to_datetime(kinneret["date"])

kinneret["value"].plot();

_images/exercise_solutions_pandas1_17_0.png

# Proportion of days without water level measurement
kinneret["value"].isna().mean()

0.5348905538416336

# Days with lowest observed water level
kinneret[kinneret["value"] == kinneret["value"].min()]

	value
2001-11-25	-214.87
2001-11-26	-214.87
2001-11-27	-214.87
2001-11-28	-214.87
2001-11-29	-214.87

# Length of longest consecutive period without a measurement
counter = 0
maxcount = 0
for i in range(0, kinneret.shape[0]):
    current = kinneret["value"].iloc[i]
    if np.isnan(current):
        counter = counter + 1
    else:
        if(counter > maxcount):
            maxcount = counter
        counter = 0
maxcount