{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(assignment-3)=\n",
    "# Home assignment 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Last updated: 2022-05-11 11:29:46\n"
     ]
    }
   ],
   "source": [
    "!echo Last updated: `date +\"%Y-%m-%d %H:%M:%S\"`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9be78d54",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "pd.options.display.max_rows = 10\n",
    "pd.options.display.max_columns = 10\n",
    "pd.options.display.max_colwidth = 35\n",
    "plt.rcParams[\"figure.figsize\"] = (6, 6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*****"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Question 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> * Read the `world_cities.csv` file into a `DataFrame` object.\n",
    "> * Calculate and print a new table, where each row represents a *country*, with the following columns:\n",
    ">     * `\"country\"`—Country name\n",
    ">     * `\"capital\"`—The name of the capital city\n",
    ">     * `\"pop_total\"`—The total population (population in all cities summed)\n",
    "> * Note that for some countries there is more than one value marked as the capital! The resulting table still needs to have one row per country: capital name (`\"capital\"`) should be the *first* if there is more than one, while total population (`\"pop_total\"`) needs to be the sum of all cities (regardless of duplicates)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>capital</th>\n",
       "      <th>pop_total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Kabul</td>\n",
       "      <td>7543856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>Tirana</td>\n",
       "      <td>1536232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>Algiers</td>\n",
       "      <td>20508642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>American Samoa</td>\n",
       "      <td>Pago Pago</td>\n",
       "      <td>58021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>Andorra la Vella</td>\n",
       "      <td>69031</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223</th>\n",
       "      <td>Wallis and Futuna</td>\n",
       "      <td>Mata'utu</td>\n",
       "      <td>11380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>Western Sahara</td>\n",
       "      <td>al-'Ayun</td>\n",
       "      <td>338786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225</th>\n",
       "      <td>Yemen</td>\n",
       "      <td>San'a</td>\n",
       "      <td>5492077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226</th>\n",
       "      <td>Zambia</td>\n",
       "      <td>Lusaka</td>\n",
       "      <td>4032170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>Harare</td>\n",
       "      <td>4231859</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>228 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               country           capital  pop_total\n",
       "0          Afghanistan             Kabul    7543856\n",
       "1              Albania            Tirana    1536232\n",
       "2              Algeria           Algiers   20508642\n",
       "3       American Samoa         Pago Pago      58021\n",
       "4              Andorra  Andorra la Vella      69031\n",
       "..                 ...               ...        ...\n",
       "223  Wallis and Futuna          Mata'utu      11380\n",
       "224     Western Sahara          al-'Ayun     338786\n",
       "225              Yemen             San'a    5492077\n",
       "226             Zambia            Lusaka    4032170\n",
       "227           Zimbabwe            Harare    4231859\n",
       "\n",
       "[228 rows x 3 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "dat = pd.read_csv(\"data/world_cities.csv\")\n",
    "capitals = dat[dat[\"capital\"] == 1][[\"country\", \"city\"]].groupby(\"country\").first().reset_index()\n",
    "capitals = capitals.rename(columns = {\"city\": \"capital\"})\n",
    "populations = dat[[\"country\", \"pop\"]].groupby(\"country\").sum().reset_index()\n",
    "populations = populations.rename(columns = {\"pop\": \"pop_total\"})\n",
    "result = pd.merge(capitals, populations, on = \"country\")\n",
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Question 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> * The text file named `bgu.wkt` (see {ref}`sample-data`) contains a WKT string representing the geometry of the BGU logo. \n",
    "> * Read the WKT string from the `bgu.wkt` file, using the `open` and `.readline` methods (see {ref}`working-with-files`). Convert the string into a `shapely` geometry\n",
    "> * Note: Do not copy and paste the WKT string into your code! You need to read it from the `bgu.wkt` file.\n",
    "> * Display the logo graphically."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-0.03654968 0.10526832 0.5952353600000001 0.9868413600000001\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,1.197378)\"><g><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0197368272\" opacity=\"0.6\" d=\"M 0.164551,0.268229 L 0.162651,0.165419 L 0.163411,0.141818 L 0.0,0.141818 L 0.015191,0.155382 L 0.0493164,0.208876 L 0.0709636,0.289497 L 0.0705296,0.376574 L 0.0593533,0.441894 L 0.0534397,0.462999 L 0.0604926,0.457303 L 0.0782335,0.443414 L 0.101617,0.426161 L 0.119358,0.414008 L 0.125488,0.410319 L 0.129666,0.407824 L 0.142524,0.400771 L 0.160157,0.391656 L 0.17399,0.384819 L 0.179579,0.382107 L 0.175672,0.362739 L 0.164551,0.268229 z\" /><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0197368272\" opacity=\"0.6\" d=\"M 0.192219,0.226346 L 0.198133,0.312554 L 0.215603,0.392469 L 0.236708,0.443414 L 0.247017,0.458062 L 0.256565,0.471733 L 0.291124,0.508572 L 0.338596,0.555122 L 0.378256,0.606174 L 0.397299,0.653754 L 0.398981,0.670031 L 0.400987,0.689725 L 0.388672,0.750597 L 0.351292,0.837131 L 0.299804,0.918457 L 0.261339,0.968045 L 0.247288,0.983344 L 0.249132,0.966743 L 0.24235,0.915528 L 0.219021,0.849393 L 0.187011,0.785428 L 0.15625,0.723958 L 0.13661,0.665419 L 0.137912,0.610135 L 0.170085,0.558485 L 0.223469,0.520019 L 0.243001,0.510796 L 0.239746,0.506401 L 0.231879,0.495335 L 0.222168,0.480849 L 0.215332,0.469889 L 0.213271,0.466092 L 0.210992,0.462022 L 0.204969,0.449328 L 0.197428,0.431858 L 0.191949,0.418186 L 0.189887,0.412597 L 0.157986,0.428928 L 0.071289,0.495443 L 0.0250108,0.578831 L 0.0321724,0.662164 L 0.0737847,0.744738 L 0.130968,0.826064 L 0.184733,0.905382 L 0.2162,0.982096 L 0.214356,1.04004 L 0.206435,1.05556 L 0.233724,1.04058 L 0.310167,0.985406 L 0.406467,0.891439 L 0.478136,0.780925 L 0.507215,0.691135 L 0.508029,0.660915 L 0.508518,0.640788 L 0.488064,0.579753 L 0.437771,0.513943 L 0.378689,0.452854 L 0.340603,0.404243 L 0.332519,0.38596 L 0.324328,0.367567 L 0.307943,0.263726 L 0.326118,0.160157 L 0.335232,0.141818 L 0.195367,0.141818 L 0.193576,0.162869 L 0.192219,0.226346 z\" /><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0197368272\" opacity=\"0.6\" d=\"M 0.378526,0.141818 L 0.366754,0.157607 L 0.345215,0.215115 L 0.339735,0.299914 L 0.36339,0.381293 L 0.400933,0.432781 L 0.417372,0.444282 L 0.490018,0.405382 L 0.478733,0.389594 L 0.45356,0.335829 L 0.442546,0.263781 L 0.463271,0.196669 L 0.505154,0.151639 L 0.522136,0.141818 L 0.378526,0.141818 z\" /></g></g></svg>",
      "text/plain": [
       "<shapely.geometry.multipolygon.MultiPolygon at 0x7f85c24c78b0>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import shapely.geometry\n",
    "import shapely.wkt\n",
    "\n",
    "f = open(\"data/bgu.wkt\", \"r\", encoding = \"utf-8\")\n",
    "text = f.readline()\n",
    "f.close()\n",
    "logo = shapely.wkt.loads(text)\n",
    "logo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> * Calculate the area of the logo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.21286694897980007"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logo.area"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> * Calculate a circle that is enclosing the logo. To do that, first calculate the average of x-axis bounds and the average of the y-axis bounds, then construct a point according to those x and y values, and finally buffer the point to a distance of your choice so that the logo is completely within the buffer.\n",
    "> * Calculate the geometry of the *difference* between the logo and the bouding circle you calculated, then *plot* it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-0.4949319999999999 -0.15731099999999992 1.512 1.512\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,1.197378)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.03024\" opacity=\"0.6\" d=\"M 0.9576973086705378,0.5300770017693076 L 0.9476176962822613,0.4621257745887103 L 0.9309262350125462,0.39548972592187637 L 0.9077836727579007,0.3308105973444372 L 0.8784128850438486,0.2687112842218017 L 0.8430967286117816,0.2097898368862785 L 0.8021753173539159,0.1546137010854482 L 0.7560427468305833,0.10371425316941685 L 0.7051432989145519,0.05758168264608421 L 0.6499671631137216,0.01666027138821835 L 0.5910457157781985,-0.018655885043848408 L 0.5289464026555628,-0.048026672757900624 L 0.4642672740781236,-0.07116923501254624 L 0.3976312254112898,-0.08786069628226123 L 0.32967999823069255,-0.09794030867053771 L 0.2610680000000001,-0.10131099999999993 L 0.19245600176930758,-0.09794030867053782 L 0.12450477458871029,-0.08786069628226123 L 0.05786872592187653,-0.07116923501254624 L -0.006810402655562764,-0.048026672757900624 L -0.06890971577819832,-0.01865588504384852 L -0.12783116311372134,0.01666027138821824 L -0.1830072989145517,0.0575816826460841 L -0.23390674683058316,0.10371425316941674 L -0.2800393173539158,0.1546137010854482 L -0.32096072861178165,0.2097898368862785 L -0.3562768850438484,0.2687112842218015 L -0.38564767275790063,0.33081059734443713 L -0.40879023501254613,0.39548972592187637 L -0.42548169628226123,0.46212577458871 L -0.4355613086705377,0.5300770017693075 L -0.43893199999999993,0.5986889999999999 L -0.43556130867053783,0.6673009982306924 L -0.42548169628226123,0.7352522254112899 L -0.40879023501254624,0.8018882740781235 L -0.38564767275790074,0.8665674026555628 L -0.3562768850438485,0.9286667157781984 L -0.32096072861178176,0.9875881631137213 L -0.2800393173539159,1.0427642989145518 L -0.23390674683058332,1.0936637468305832 L -0.18300729891455209,1.1397963173539156 L -0.1278311631137215,1.1807177286117816 L -0.06890971577819849,1.2160338850438484 L -0.006810402655563208,1.2454046727579007 L 0.057868725921876335,1.2685472350125462 L 0.12450477458870995,1.2852386962822613 L 0.19245600176930772,1.295318308670538 L 0.2610679999999999,1.298689 L 0.32967999823069205,1.295318308670538 L 0.3976312254112898,1.2852386962822613 L 0.46426727407812346,1.2685472350125462 L 0.528946402655563,1.2454046727579007 L 0.5910457157781983,1.2160338850438486 L 0.6499671631137213,1.1807177286117818 L 0.7051432989145519,1.1397963173539158 L 0.7560427468305831,1.0936637468305834 L 0.8021753173539157,1.0427642989145522 L 0.8430967286117816,0.9875881631137216 L 0.8784128850438484,0.9286667157781985 L 0.9077836727579005,0.8665674026555632 L 0.9309262350125462,0.8018882740781238 L 0.9476176962822611,0.7352522254112901 L 0.9576973086705378,0.6673009982306923 L 0.961068,0.598689 L 0.9576973086705378,0.5300770017693076 z M 0.175672,0.362739 L 0.179579,0.382107 L 0.17399,0.384819 L 0.160157,0.391656 L 0.142524,0.400771 L 0.129666,0.407824 L 0.125488,0.410319 L 0.119358,0.414008 L 0.101617,0.426161 L 0.0782335,0.443414 L 0.0604926,0.457303 L 0.0534397,0.462999 L 0.0593533,0.441894 L 0.0705296,0.376574 L 0.0709636,0.289497 L 0.0493164,0.208876 L 0.015191,0.155382 L 0.0,0.141818 L 0.163411,0.141818 L 0.162651,0.165419 L 0.164551,0.268229 L 0.175672,0.362739 z M 0.193576,0.162869 L 0.195367,0.141818 L 0.335232,0.141818 L 0.326118,0.160157 L 0.307943,0.263726 L 0.324328,0.367567 L 0.332519,0.38596 L 0.340603,0.404243 L 0.378689,0.452854 L 0.437771,0.513943 L 0.488064,0.579753 L 0.508518,0.640788 L 0.508029,0.660915 L 0.507215,0.691135 L 0.478136,0.780925 L 0.406467,0.891439 L 0.310167,0.985406 L 0.233724,1.04058 L 0.206435,1.05556 L 0.214356,1.04004 L 0.2162,0.982096 L 0.184733,0.905382 L 0.130968,0.826064 L 0.0737847,0.744738 L 0.0321724,0.662164 L 0.0250108,0.578831 L 0.071289,0.495443 L 0.157986,0.428928 L 0.189887,0.412597 L 0.191949,0.418186 L 0.197428,0.431858 L 0.204969,0.449328 L 0.210992,0.462022 L 0.213271,0.466092 L 0.215332,0.469889 L 0.222168,0.480849 L 0.231879,0.495335 L 0.239746,0.506401 L 0.243001,0.510796 L 0.223469,0.520019 L 0.170085,0.558485 L 0.137912,0.610135 L 0.13661,0.665419 L 0.15625,0.723958 L 0.187011,0.785428 L 0.219021,0.849393 L 0.24235,0.915528 L 0.249132,0.966743 L 0.247288,0.983344 L 0.261339,0.968045 L 0.299804,0.918457 L 0.351292,0.837131 L 0.388672,0.750597 L 0.400987,0.689725 L 0.398981,0.670031 L 0.397299,0.653754 L 0.378256,0.606174 L 0.338596,0.555122 L 0.291124,0.508572 L 0.256565,0.471733 L 0.247017,0.458062 L 0.236708,0.443414 L 0.215603,0.392469 L 0.198133,0.312554 L 0.192219,0.226346 L 0.193576,0.162869 z M 0.522136,0.141818 L 0.505154,0.151639 L 0.463271,0.196669 L 0.442546,0.263781 L 0.45356,0.335829 L 0.478733,0.389594 L 0.490018,0.405382 L 0.417372,0.444282 L 0.400933,0.432781 L 0.36339,0.381293 L 0.339735,0.299914 L 0.345215,0.215115 L 0.366754,0.157607 L 0.378526,0.141818 L 0.522136,0.141818 z\" /></g></svg>",
      "text/plain": [
       "<shapely.geometry.polygon.Polygon at 0x7f85c24d22e0>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bb = logo.bounds\n",
    "x = (bb[0] + bb[2]) / 2\n",
    "y = (bb[1] + bb[3]) / 2\n",
    "pnt = shapely.wkt.loads(\"POINT (\" + str(x) + \" \" + str(y) + \")\")\n",
    "circ = pnt.buffer(0.7)\n",
    "circ.difference(logo)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}