{ "cells": [ { "cell_type": "markdown", "id": "dee9ac98", "metadata": { "scrolled": true }, "source": [ "# Converting postcode to area\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "d4426ad1", "metadata": { "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "import sys\n", "MAIN_PATH = \"C:\\\\Users\\\\44781\\\\Documents\\\\GitHub\\\\Pesticide\"\n", "sys.path.append(MAIN_PATH)" ] }, { "cell_type": "markdown", "id": "e3095b2f", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 2, "id": "4a74c8fe", "metadata": { "tags": [ "hide-cell" ] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sample_iddate_of_samplingdescriptioncountry_of_originretail_outletaddressbrand_namepacker_/_manufacturer_/_importerpesticide_residues_found_in_mg/kg_(mrl)productaddress_postcodepacker__postcodechem_nameamount_detectedmrlamount_pc
01958/20162016-08-08Bramley ApplesUKAsdaCreechbarrow Road, Taunton TA1 2ANAsdaAsda Stores Ltd Leeds, UK LS11 5ADboscalid 0.03 (MRL = 2)AppleTA1 2ANLS11 5ADboscalid0.032.00.015000
11958/20162016-08-08Bramley ApplesUKAsdaCreechbarrow Road, Taunton TA1 2ANAsdaAsda Stores Ltd Leeds, UK LS11 5ADpyraclostrobin 0.01 (MRL = 0.5)AppleTA1 2ANLS11 5ADpyraclostrobin0.010.50.020000
20230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGboscalid 0.05 (MRL = 2)AppleLN11 0LTM60 0AGboscalid0.052.00.025000
30230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGflonicamid (sum) 0.02 (MRL = 0.2)AppleLN11 0LTM60 0AGflonicamid (sum)0.020.20.100000
40230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGpyraclostrobin 0.03 (MRL = 0.5)AppleLN11 0LTM60 0AGpyraclostrobin0.030.50.060000
...................................................
239470792/20222022-09-08Marfona Unwashed PotatoesUKStoke Fruit Farm77 Havant Road, Hayling Island, Hampshire PO11...None statedStoke Fruit Farm 77 Havant Road, Hayling Islan...maleic hydrazide 7.7 (MRL = 60)Potatoes_(GB)PO11 0PTPO11 0PTmaleic hydrazide7.7060.00.128333
239480765/20222022-09-14Sagitta PotatoesUKTotal Produce Ltd.Callywith Gate Industrial Estate, Launceston R...None statedCS Duston & Son Colwith Farm, Par, Cornwallmaleic hydrazide 17 (MRL = 60)Potatoes_(GB)PL31 2RQ0maleic hydrazide17.0060.00.283333
239490938/20222022-09-01Marfona Ware PotatoesUKTulipland Potatoes LtdFengate, Moulton Chapel PE12 0XLNone statedGarden of ElvedenNaNPotatoes_(GB)PE12 0XL000.000.00.000000
239500833/20222022-08-31Lady RosettaEngland (UK)Tyrrells CrispsTyrrells Court, Stretford Bridge, Leominster H...None statedKP CrispsNaNPotatoes_(GB)HR6 9DQ000.000.00.000000
239510833/20222022-08-31Lady RosettaEngland (UK)Tyrrells CrispsTyrrells Court, Stretford Bridge, Leominster H...None statedKP CrispsNaNPotatoes_(GB)HR6 9DQ000.000.00.000000
\n", "

23952 rows × 16 columns

\n", "
" ], "text/plain": [ " sample_id date_of_sampling description \\\n", "0 1958/2016 2016-08-08 Bramley Apples \n", "1 1958/2016 2016-08-08 Bramley Apples \n", "2 0230/2016 2016-08-08 Bramley Apples \n", "3 0230/2016 2016-08-08 Bramley Apples \n", "4 0230/2016 2016-08-08 Bramley Apples \n", "... ... ... ... \n", "23947 0792/2022 2022-09-08 Marfona Unwashed Potatoes \n", "23948 0765/2022 2022-09-14 Sagitta Potatoes \n", "23949 0938/2022 2022-09-01 Marfona Ware Potatoes \n", "23950 0833/2022 2022-08-31 Lady Rosetta \n", "23951 0833/2022 2022-08-31 Lady Rosetta \n", "\n", " country_of_origin retail_outlet \\\n", "0 UK Asda \n", "1 UK Asda \n", "2 UK Co-op \n", "3 UK Co-op \n", "4 UK Co-op \n", "... ... ... \n", "23947 UK Stoke Fruit Farm \n", "23948 UK Total Produce Ltd. \n", "23949 UK Tulipland Potatoes Ltd \n", "23950 England (UK) Tyrrells Crisps \n", "23951 England (UK) Tyrrells Crisps \n", "\n", " address brand_name \\\n", "0 Creechbarrow Road, Taunton TA1 2AN Asda \n", "1 Creechbarrow Road, Taunton TA1 2AN Asda \n", "2 Northgate, Louth LN11 0LT Co-op \n", "3 Northgate, Louth LN11 0LT Co-op \n", "4 Northgate, Louth LN11 0LT Co-op \n", "... ... ... \n", "23947 77 Havant Road, Hayling Island, Hampshire PO11... None stated \n", "23948 Callywith Gate Industrial Estate, Launceston R... None stated \n", "23949 Fengate, Moulton Chapel PE12 0XL None stated \n", "23950 Tyrrells Court, Stretford Bridge, Leominster H... None stated \n", "23951 Tyrrells Court, Stretford Bridge, Leominster H... None stated \n", "\n", " packer_/_manufacturer_/_importer \\\n", "0 Asda Stores Ltd Leeds, UK LS11 5AD \n", "1 Asda Stores Ltd Leeds, UK LS11 5AD \n", "2 Co-operative Group Ltd Manchester M60 0AG \n", "3 Co-operative Group Ltd Manchester M60 0AG \n", "4 Co-operative Group Ltd Manchester M60 0AG \n", "... ... \n", "23947 Stoke Fruit Farm 77 Havant Road, Hayling Islan... \n", "23948 CS Duston & Son Colwith Farm, Par, Cornwall \n", "23949 Garden of Elveden \n", "23950 KP Crisps \n", "23951 KP Crisps \n", "\n", " pesticide_residues_found_in_mg/kg_(mrl) product address_postcode \\\n", "0 boscalid 0.03 (MRL = 2) Apple TA1 2AN \n", "1 pyraclostrobin 0.01 (MRL = 0.5) Apple TA1 2AN \n", "2 boscalid 0.05 (MRL = 2) Apple LN11 0LT \n", "3 flonicamid (sum) 0.02 (MRL = 0.2) Apple LN11 0LT \n", "4 pyraclostrobin 0.03 (MRL = 0.5) Apple LN11 0LT \n", "... ... ... ... \n", "23947 maleic hydrazide 7.7 (MRL = 60) Potatoes_(GB) PO11 0PT \n", "23948 maleic hydrazide 17 (MRL = 60) Potatoes_(GB) PL31 2RQ \n", "23949 NaN Potatoes_(GB) PE12 0XL \n", "23950 NaN Potatoes_(GB) HR6 9DQ \n", "23951 NaN Potatoes_(GB) HR6 9DQ \n", "\n", " packer__postcode chem_name amount_detected mrl amount_pc \n", "0 LS11 5AD boscalid 0.03 2.0 0.015000 \n", "1 LS11 5AD pyraclostrobin 0.01 0.5 0.020000 \n", "2 M60 0AG boscalid 0.05 2.0 0.025000 \n", "3 M60 0AG flonicamid (sum) 0.02 0.2 0.100000 \n", "4 M60 0AG pyraclostrobin 0.03 0.5 0.060000 \n", "... ... ... ... ... ... \n", "23947 PO11 0PT maleic hydrazide 7.70 60.0 0.128333 \n", "23948 0 maleic hydrazide 17.00 60.0 0.283333 \n", "23949 0 0 0.00 0.0 0.000000 \n", "23950 0 0 0.00 0.0 0.000000 \n", "23951 0 0 0.00 0.0 0.000000 \n", "\n", "[23952 rows x 16 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import pandas as pd\n", "import copy\n", "import src.data_loading.loads_from_url as lfu\n", "import src.utils.map_utils as mpu\n", "from src.data_loading.loads_from_url import get_poscode_df\n", "\n", "data_file = os.path.join(MAIN_PATH, 'src','utils', 'map_data','combined_data.csv')\n", "df = pd.read_csv(data_file,index_col=0)\n", "df\n" ] }, { "cell_type": "markdown", "id": "1b98c274", "metadata": {}, "source": [ "## Load the poscode data\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "dd584299", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1874663\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PostcodemapArea
0AL1 1AGHertfordshire
1AL1 1AJHertfordshire
2AL1 1ARHertfordshire
3AL1 1ASHertfordshire
4AL1 1ATHertfordshire
\n", "
" ], "text/plain": [ " Postcode mapArea\n", "0 AL1 1AG Hertfordshire\n", "1 AL1 1AJ Hertfordshire\n", "2 AL1 1AR Hertfordshire\n", "3 AL1 1AS Hertfordshire\n", "4 AL1 1AT Hertfordshire" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "map_data_file = os.path.join(MAIN_PATH, \"src\", \"utils\", \"map_data\", \"postcode_to_region.csv\")\n", "postcodes_df = lfu.get_poscode_df(path_to_csv = map_data_file)\n", "\n", "print(len(postcodes_df))\n", "postcodes_df.head()\n" ] }, { "cell_type": "markdown", "id": "4a65eea5", "metadata": {}, "source": [ "## Convert the postcode to area" ] }, { "cell_type": "code", "execution_count": 4, "id": "d0ced57e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sample_iddate_of_samplingdescriptioncountry_of_originretail_outletaddressbrand_namepacker_/_manufacturer_/_importerpesticide_residues_found_in_mg/kg_(mrl)productaddress_postcodepacker__postcodechem_nameamount_detectedmrlamount_pcarea_to_plot
01958/20162016-08-08Bramley ApplesUKAsdaCreechbarrow Road, Taunton TA1 2ANAsdaAsda Stores Ltd Leeds, UK LS11 5ADboscalid 0.03 (MRL = 2)AppleTA1 2ANLS11 5ADboscalid0.032.00.015000Somerset
11958/20162016-08-08Bramley ApplesUKAsdaCreechbarrow Road, Taunton TA1 2ANAsdaAsda Stores Ltd Leeds, UK LS11 5ADpyraclostrobin 0.01 (MRL = 0.5)AppleTA1 2ANLS11 5ADpyraclostrobin0.010.50.020000Somerset
20230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGboscalid 0.05 (MRL = 2)AppleLN11 0LTM60 0AGboscalid0.052.00.025000Lincolnshire
30230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGflonicamid (sum) 0.02 (MRL = 0.2)AppleLN11 0LTM60 0AGflonicamid (sum)0.020.20.100000Lincolnshire
40230/20162016-08-08Bramley ApplesUKCo-opNorthgate, Louth LN11 0LTCo-opCo-operative Group Ltd Manchester M60 0AGpyraclostrobin 0.03 (MRL = 0.5)AppleLN11 0LTM60 0AGpyraclostrobin0.030.50.060000Lincolnshire
......................................................
239470792/20222022-09-08Marfona Unwashed PotatoesUKStoke Fruit Farm77 Havant Road, Hayling Island, Hampshire PO11...None statedStoke Fruit Farm 77 Havant Road, Hayling Islan...maleic hydrazide 7.7 (MRL = 60)Potatoes_(GB)PO11 0PTPO11 0PTmaleic hydrazide7.7060.00.128333Hampshire
239480765/20222022-09-14Sagitta PotatoesUKTotal Produce Ltd.Callywith Gate Industrial Estate, Launceston R...None statedCS Duston & Son Colwith Farm, Par, Cornwallmaleic hydrazide 17 (MRL = 60)Potatoes_(GB)PL31 2RQ0maleic hydrazide17.0060.00.283333Cornwall
239490938/20222022-09-01Marfona Ware PotatoesUKTulipland Potatoes LtdFengate, Moulton Chapel PE12 0XLNone statedGarden of ElvedenNaNPotatoes_(GB)PE12 0XL000.000.00.000000Lincolnshire
239500833/20222022-08-31Lady RosettaEngland (UK)Tyrrells CrispsTyrrells Court, Stretford Bridge, Leominster H...None statedKP CrispsNaNPotatoes_(GB)HR6 9DQ000.000.00.000000Herefordshire
239510833/20222022-08-31Lady RosettaEngland (UK)Tyrrells CrispsTyrrells Court, Stretford Bridge, Leominster H...None statedKP CrispsNaNPotatoes_(GB)HR6 9DQ000.000.00.000000Herefordshire
\n", "

23952 rows × 17 columns

\n", "
" ], "text/plain": [ " sample_id date_of_sampling description \\\n", "0 1958/2016 2016-08-08 Bramley Apples \n", "1 1958/2016 2016-08-08 Bramley Apples \n", "2 0230/2016 2016-08-08 Bramley Apples \n", "3 0230/2016 2016-08-08 Bramley Apples \n", "4 0230/2016 2016-08-08 Bramley Apples \n", "... ... ... ... \n", "23947 0792/2022 2022-09-08 Marfona Unwashed Potatoes \n", "23948 0765/2022 2022-09-14 Sagitta Potatoes \n", "23949 0938/2022 2022-09-01 Marfona Ware Potatoes \n", "23950 0833/2022 2022-08-31 Lady Rosetta \n", "23951 0833/2022 2022-08-31 Lady Rosetta \n", "\n", " country_of_origin retail_outlet \\\n", "0 UK Asda \n", "1 UK Asda \n", "2 UK Co-op \n", "3 UK Co-op \n", "4 UK Co-op \n", "... ... ... \n", "23947 UK Stoke Fruit Farm \n", "23948 UK Total Produce Ltd. \n", "23949 UK Tulipland Potatoes Ltd \n", "23950 England (UK) Tyrrells Crisps \n", "23951 England (UK) Tyrrells Crisps \n", "\n", " address brand_name \\\n", "0 Creechbarrow Road, Taunton TA1 2AN Asda \n", "1 Creechbarrow Road, Taunton TA1 2AN Asda \n", "2 Northgate, Louth LN11 0LT Co-op \n", "3 Northgate, Louth LN11 0LT Co-op \n", "4 Northgate, Louth LN11 0LT Co-op \n", "... ... ... \n", "23947 77 Havant Road, Hayling Island, Hampshire PO11... None stated \n", "23948 Callywith Gate Industrial Estate, Launceston R... None stated \n", "23949 Fengate, Moulton Chapel PE12 0XL None stated \n", "23950 Tyrrells Court, Stretford Bridge, Leominster H... None stated \n", "23951 Tyrrells Court, Stretford Bridge, Leominster H... None stated \n", "\n", " packer_/_manufacturer_/_importer \\\n", "0 Asda Stores Ltd Leeds, UK LS11 5AD \n", "1 Asda Stores Ltd Leeds, UK LS11 5AD \n", "2 Co-operative Group Ltd Manchester M60 0AG \n", "3 Co-operative Group Ltd Manchester M60 0AG \n", "4 Co-operative Group Ltd Manchester M60 0AG \n", "... ... \n", "23947 Stoke Fruit Farm 77 Havant Road, Hayling Islan... \n", "23948 CS Duston & Son Colwith Farm, Par, Cornwall \n", "23949 Garden of Elveden \n", "23950 KP Crisps \n", "23951 KP Crisps \n", "\n", " pesticide_residues_found_in_mg/kg_(mrl) product address_postcode \\\n", "0 boscalid 0.03 (MRL = 2) Apple TA1 2AN \n", "1 pyraclostrobin 0.01 (MRL = 0.5) Apple TA1 2AN \n", "2 boscalid 0.05 (MRL = 2) Apple LN11 0LT \n", "3 flonicamid (sum) 0.02 (MRL = 0.2) Apple LN11 0LT \n", "4 pyraclostrobin 0.03 (MRL = 0.5) Apple LN11 0LT \n", "... ... ... ... \n", "23947 maleic hydrazide 7.7 (MRL = 60) Potatoes_(GB) PO11 0PT \n", "23948 maleic hydrazide 17 (MRL = 60) Potatoes_(GB) PL31 2RQ \n", "23949 NaN Potatoes_(GB) PE12 0XL \n", "23950 NaN Potatoes_(GB) HR6 9DQ \n", "23951 NaN Potatoes_(GB) HR6 9DQ \n", "\n", " packer__postcode chem_name amount_detected mrl amount_pc \\\n", "0 LS11 5AD boscalid 0.03 2.0 0.015000 \n", "1 LS11 5AD pyraclostrobin 0.01 0.5 0.020000 \n", "2 M60 0AG boscalid 0.05 2.0 0.025000 \n", "3 M60 0AG flonicamid (sum) 0.02 0.2 0.100000 \n", "4 M60 0AG pyraclostrobin 0.03 0.5 0.060000 \n", "... ... ... ... ... ... \n", "23947 PO11 0PT maleic hydrazide 7.70 60.0 0.128333 \n", "23948 0 maleic hydrazide 17.00 60.0 0.283333 \n", "23949 0 0 0.00 0.0 0.000000 \n", "23950 0 0 0.00 0.0 0.000000 \n", "23951 0 0 0.00 0.0 0.000000 \n", "\n", " area_to_plot \n", "0 Somerset \n", "1 Somerset \n", "2 Lincolnshire \n", "3 Lincolnshire \n", "4 Lincolnshire \n", "... ... \n", "23947 Hampshire \n", "23948 Cornwall \n", "23949 Lincolnshire \n", "23950 Herefordshire \n", "23951 Herefordshire \n", "\n", "[23952 rows x 17 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def create_area_col(df, postcodes_df, postcode_column):\n", " postcode_dict = dict(postcodes_df.values)\n", " \n", " df['area_to_plot'] = df.loc[:,postcode_column].map(postcode_dict)\n", " return df\n", "\n", "postcode_column = 'address_postcode'\n", "df = create_area_col(df, postcodes_df, postcode_column)\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "id": "59b8bb00", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number postcodes not found 1435 or 6.0%, not including 213 with no postcode\n" ] } ], "source": [ "def how_many_failed(df):\n", " tot_len = len(df)\n", " bad_len = len(df.loc[df.area_to_plot.isna()])\n", " bad_pcode = len(df.loc[df[postcode_column]=='0'])\n", " bad_len = bad_len - bad_pcode\n", " \n", " print(f\"Number postcodes not found {bad_len} or {100*bad_len/tot_len:.1f}%,\\\n", " not including {bad_pcode} with no postcode\")\n", "\n", "\n", "how_many_failed(df)" ] }, { "cell_type": "markdown", "id": "da587501", "metadata": {}, "source": [ "## Reduce the length of the postcode\n", "\n", "6% or over 1,000 postcodes were not found. Which is a lot. Maybe a better postcode database is needed.\n", "\n", "But another way to find more postcode areas is to use less of the postcode\n", "\n", "For example for `AL1 1AG`\n", "- try `AL1 1A`\n", "- if still lots missing try `AL1 1`\n", "- and then `AL1`\n", "\n", "For these smaller post-code areas they can be in more than one plot area. A quick way to choose which area to use is the area which has the most postcodes. Maybe a better way would be to look for postcodes near in the alphabet, but this way is simpler.\n", "\n", "So for example, in the below if a postcode AL1 1AA is unknown. Because all the other postcodes AL1 1A are Hertfordshire it will assign that unkown postcode to Hertfordshire. If the areas are split (e.g. AL1 1AS was in West Wales but the rest the same) it would assign the unknown postcode AL1 1AA to Hertfordshire.\n", " " ] }, { "cell_type": "code", "execution_count": 6, "id": "9225f7e9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PostcodemapArea
0AL1 1AGHertfordshire
1AL1 1AJHertfordshire
2AL1 1ARHertfordshire
3AL1 1ASHertfordshire
4AL1 1ATHertfordshire
5AL1 1AUHertfordshire
6AL1 1AWHertfordshire
\n", "
" ], "text/plain": [ " Postcode mapArea\n", "0 AL1 1AG Hertfordshire\n", "1 AL1 1AJ Hertfordshire\n", "2 AL1 1AR Hertfordshire\n", "3 AL1 1AS Hertfordshire\n", "4 AL1 1AT Hertfordshire\n", "5 AL1 1AU Hertfordshire\n", "6 AL1 1AW Hertfordshire" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "postcodes_df.loc[postcodes_df['Postcode'].apply(lambda x: x[:-1])=='AL1 1A']\n" ] }, { "cell_type": "markdown", "id": "3cb10ce2", "metadata": {}, "source": [ "### It works but a tad convoluted\n", "\n", "\n", "Create a list " ] }, { "cell_type": "code", "execution_count": 7, "id": "754421e2", "metadata": {}, "outputs": [], "source": [ "def get_postcode_list(postcodes_df=postcodes_df,\n", " usecols=['Postcode','mapArea'],\n", " reduce_p_max=6\n", " ):\n", " \"\"\"\n", " loads poscode data and returns that as a list of dataframes\n", " where postcode has has been reduced by 1:reduce_p_max-1 in length\n", " \n", " Args: path_to_csv (path) csv file of postcode data\n", " usecols (list[str]) what columns to load, also includes latitude,longitude\n", " reduce_p_max (int) number of chars to take off end of potscode -1\n", " Returns: \n", " list_postcodes_df (list[pd.DataFrame]) list of pandas dataframe of the postcode data\n", " \n", " \"\"\" \n", "\n", " postcodes_df_temp = postcodes_df.copy()\n", " \n", " list_postcodes_df = []\n", " list_postcodes_df.append( postcodes_df_temp.copy() )\n", " \n", " for i in range(1,reduce_p_max):\n", " colname = f\"Postcode_m{i}\"\n", " \n", " if i!=0:\n", " postcodes_df_temp[colname] = postcodes_df_temp['Postcode'].str[:-i] \n", " \n", " df = _getpc(postcodes_df_temp, colname)\n", " df = df.rename(columns={colname:'Postcode'})\n", " list_postcodes_df.append( df )\n", " \n", " \n", " return list_postcodes_df\n", "\n", "def _getpc(postcodes_df_temp, colname):\n", " \"\"\"\n", " Helper function to identify the region when a postcode is across multiple regions\n", " Does this by selecting region that has most postcodes\n", " e.g. BH1 8L is in two areas (A and B), area A is in 3 times and B in 5 times, \n", " so B is selected as the area\n", " Args: postcodes_df_temp (pd.DataFrame) dataframe of postcode data\n", " colname (str) what column to process some variant of postcode\n", " Returns: \n", " postcode_short (pd.DataFrame) modified dataframe with new variable mapArea\n", " \"\"\"\n", " postcodes_df_temp = postcodes_df_temp.groupby(by=[colname,'mapArea'], as_index=False).count()\n", "\n", " postcode_short = postcodes_df_temp.sort_values('Postcode', ascending=False).drop_duplicates([colname])\n", " \n", " postcode_short = postcode_short.loc[:,[colname,'mapArea']].reset_index(drop=True)\n", " return postcode_short\n", "\n", "shorter_pc_list = get_postcode_list()\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "50f3889f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PostcodemapArea
0AB2North East Scotland
1AB4North East Scotland
2NP4South East Wales
3CR0Greater London
4NP9South East Wales
\n", "
" ], "text/plain": [ " Postcode mapArea\n", "0 AB2 North East Scotland\n", "1 AB4 North East Scotland\n", "2 NP4 South East Wales\n", "3 CR0 Greater London\n", "4 NP9 South East Wales" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print( len(shorter_pc_list) )\n", "shorter_pc_list[3].head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "22f615be", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "For postcode smaller by 0 characters\n", "Number postcodes not found 1435 or 6.0%, not including 213 with no postcode\n", "\n", "For postcode smaller by 1 characters\n", "Number postcodes not found 695 or 2.9%, not including 213 with no postcode\n", "\n", "For postcode smaller by 2 characters\n", "Number postcodes not found 213 or 0.9%, not including 213 with no postcode\n", "\n", "For postcode smaller by 3 characters\n", "Number postcodes not found 95 or 0.4%, not including 213 with no postcode\n", "\n", "For postcode smaller by 4 characters\n", "Number postcodes not found 95 or 0.4%, not including 213 with no postcode\n", "\n", "For postcode smaller by 5 characters\n", "Number postcodes not found 92 or 0.4%, not including 213 with no postcode\n" ] } ], "source": [ "dfnew= copy.deepcopy(df)\n", "dfnew = dfnew.loc[:,['address','address_postcode']]\n", "\n", "for removeChar in range(0,6):\n", " # create a new column\n", " \n", " if removeChar!=0:\n", " postcode_columnNEW =postcode_column+f'{removeChar}'\n", " dfnew[postcode_columnNEW]=df[postcode_column].str[:-removeChar]\n", " else:\n", " postcode_columnNEW=postcode_column\n", " dfnew = create_area_col(dfnew, \n", " shorter_pc_list[removeChar], postcode_columnNEW)\n", " print()\n", " print(f'For postcode smaller by {removeChar} characters')\n", " how_many_failed(dfnew)" ] }, { "cell_type": "markdown", "id": "3ec907b1", "metadata": {}, "source": [ "## Update postcode data\n", "\n", "So if we take away 3 characters (for example, AL1 1AG -> AL1) 99.6% of the postcodes are found.\n", "\n", "Now we need to add the postcodes which didn't appear in teh original postcode dataset to that dataset with the new areas found.\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "761f5e9d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "After initial mapping\n", " The length of postcode mapper is 1874663 \n", " and 147 still unknonwn\n", "\n", ">> Post-code length reduced by 1\n", "Number of new postcodes added 80\n", "The length of postcode mapper is 1874743 \n", "and 67 still unknonwn\n", "\n", ">> Post-code length reduced by 2\n", "Number of new postcodes added 18\n", "The length of postcode mapper is 1874761 \n", "and 49 still unknonwn\n", "\n", ">> Post-code length reduced by 3\n", "Number of new postcodes added 27\n", "The length of postcode mapper is 1874788 \n", "and 22 still unknonwn\n", "\n" ] } ], "source": [ "def _create_area_col(df, postcodes_df, postcode_column):\n", " df_ =copy.deepcopy(df)\n", " postcode_dict = dict(postcodes_df[['Postcode','mapArea']].values)\n", " \n", " df_['mapArea'] = df_.loc[:,postcode_column].map(postcode_dict)\n", " return df_['mapArea']\n", "\n", "def dothis(df, MAIN_PATH, shorter_pc_list,postcode_column = 'address_postcode'):\n", "\n", " # load postcode to area df\n", " map_data_file = os.path.join(MAIN_PATH, \"src\", \"utils\", \"map_data\", \"postcode_to_region.csv\")\n", " postcodes_to_area_df = lfu.get_poscode_df(path_to_csv = map_data_file)\n", " postcodes_to_area_df['removeChar']=0\n", " \n", " \n", " df_pc= copy.deepcopy(df)\n", "\n", " # get the unique post-codes and ignore ones that don't exist ='0'\n", " df_pc =pd.DataFrame(data=df[postcode_column].unique(),columns=['Postcode'])\n", " df_pc = df_pc.loc[df_pc.Postcode!='0']\n", "\n", " postcode_column = 'Postcode'\n", "\n", " # check if already in the postcode to area df\n", " # just use the ones that are not\n", " df_pc['mapArea'] = _create_area_col(df_pc, postcodes_to_area_df, 'Postcode')\n", " df_pc = df_pc.loc[df_pc['mapArea'].isna()]\n", "\n", " print(f\"After initial mapping\\n \\\n", "The length of postcode mapper is {len(postcodes_to_area_df)} \\n \\\n", "and {len(df_pc)} still unknonwn\\n\")\n", " \n", " for removeChar in range(1,4):\n", " print('>> Post-code length reduced by ',removeChar)\n", " # remove 1 char from the postcode\n", " df_pc[postcode_column]=df_pc[postcode_column].str[:-1]\n", "\n", " df_pc['mapArea'] = _create_area_col(df_pc, \n", " shorter_pc_list[removeChar], postcode_column)\n", "\n", " \n", " # add new values to the dictionary\n", " df_add = copy.deepcopy(df_pc[['Postcode','mapArea']].loc[df_pc['mapArea'].notna()])\n", " df_add['removeChar'] = removeChar\n", " \n", " \n", " postcodes_to_area_df = pd.concat([ postcodes_to_area_df, df_add ]) \n", " print(f\"Number of new postcodes added {len(df_add)}\")\n", "\n", " # just use postcodes not found in next iteration \n", " df_pc = copy.deepcopy(df_pc.loc[df_pc['mapArea'].isna()])\n", " print(f\"The length of postcode mapper is {len(postcodes_to_area_df)} \\n\\\n", "and {len(df_pc)} still unknonwn\")\n", " \n", " print()\n", " \n", " return postcodes_to_area_df, df_add, df_pc\n", " \n", "newPC, df_add, df_pc = dothis(df, MAIN_PATH, shorter_pc_list)" ] }, { "cell_type": "code", "execution_count": 5, "id": "9c8d853a", "metadata": { "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# postcode_column = 'address_postcode'\n", "# map_data_file = os.path.join(MAIN_PATH, \"src\", \"utils\", \"map_data\", \"postcode_to_region.csv\")\n", "# postcodes_to_area_df = mpu.get_poscode_df(path_to_csv = map_data_file)\n", "\n", "# df_pc= copy.deepcopy(df)\n", "\n", "# # get the unique post-codes and ignore ones that don't exist ='0'\n", "# df_pc =pd.DataFrame(data=df[postcode_column].unique(),columns=['Postcode'])\n", "# df_pc = df_pc.loc[df_pc.Postcode!='0']\n", "\n", "# postcode_column = 'Postcode'\n", "\n", "# # check if already in the postcode to area df\n", "# # just use the ones that are not\n", "# df_pc = _create_area_col(df_pc, postcodes_to_area_df, 'Postcode')\n", "# df_pc = df_pc.loc[df_pc['mapArea'].isna()]\n", "\n", "# removeChar = 2\n", "# df_pc[postcode_column]=df_pc[postcode_column].str[:-removeChar]\n", "\n", "# dfnew = _create_area_col(df_pc, \n", "# shorter_pc_list[removeChar], postcode_column)\n", "\n", "# # add new values to the dictionary\n", "# df_add = copy.deepcopy(dfnew[['Postcode','mapArea']].loc[dfnew['mapArea'].notna()])\n", "# postcodes_to_area_df = pd.concat([ postcodes_to_area_df, df_add ]) \n", "\n", "\n", "# # just use postcodes not found in next iteration \n", "# df_pc = dfnew.loc[dfnew['mapArea'].isna()].copy()\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "9db76a0b", "metadata": {}, "source": [ "[Go back](https://thomashsimm.github.io/PesticideDocs/CombiningMaps.html)\n", "\n", "[First page](https://thomashsimm.github.io/PesticideDocs/UK_areas.html)" ] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "vscode": { "interpreter": { "hash": "5ca7ab3073cce3b29d80f881db5d07487222bdafec031d83c06f5ba7dfb5c682" } } }, "nbformat": 4, "nbformat_minor": 5 }