Skip to content

Location finder

Locations finder.

Methods used to break down locations strings and categorise the locations into continents, countries, regions and unknown locations.

find_location(location_str, locations_data)

Find location.

Takes a location string splits it into sections. Attempts to determine what kind of location (continent, country, region) the section is.

Creates list of Location objects based on results.

Parameters:

Name Type Description Default
location_str(str)

string value of GeographicRegion cell

required
LOCATIONS_DATA(object)

location data

required

Returns:

Name Type Description
location_objs list

list of Location objects

Source code in report_generator/location_formatter/location_finder.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def find_location(location_str: str, locations_data: object) -> list:
    """Find location.

    Takes a location string splits it into sections. Attempts
    to determine what kind of location (continent, country, region) the section is.

    Creates list of Location objects based on results.

    Args:
        location_str(str): string value of GeographicRegion cell
        LOCATIONS_DATA(object) : location data

    Returns:
        location_objs(list): list of Location objects

    """
    # logger
    logger.debug("Location finder func Start")
    process_start_time = time.time()

    if locations_data is None:
        logger.error("Locations data is None")
        raise Exception

    logger.debug(f"Sections: {location_str}")

    # Handling nan/null dataset error
    if str(location_str) == "nan":
        location_str = ""

    # Split location string into component sections

    # location_str = re.sub("island|islands", "", location_str.lower()).strip()
    sections = re.split("[,/()-+]", location_str.lower().strip())

    # lists for each section
    locations = []
    continents_list = []
    countries_list = []
    regions_list = []
    unknown_list = []

    # Iterate through the sections to try and determine what kind of location they are

    for section in sections:
        # logger.debug(f"Section: {section}")

        # Data Cleaning
        # section = re.sub(" s$| is$", "", section)
        section = section.lower().strip().strip(".").strip('"').strip()

        # Handle common America issue
        if section == "central":
            section = "central america"
        if section == "north":
            section = "north america"
        if section == "south":
            section = "south america"

        # Iterate through sections check if string is in continents list
        # or countries list

        if section in locations_data["continent"].keys():
            continents_list.append(locations_data["continent"][section])
        elif section in locations_data["country"].keys():
            country = locations_data["country"][section]
            countries_list.append(country)
        elif section in locations_data["region"].keys():
            region = locations_data["region"][section]
            regions_list.append(locations_data["region"][section])
        else:
            # logger.debug(f"Region Found: {section}")
            unknown_list.append(section)

    # Try to create a location object based on results

    for continent in continents_list:
        loc = Location(
            continent=continent["continent"],
            latitude=continent["latitude"],
            longitude=continent["longitude"],
        )
        locations.append(loc)

    for country in countries_list:
        loc = Location(
            country["continent"],
            country["country"],
            latitude=country["latitude"],
            longitude=country["longitude"],
            country_code=country["country_code"],
        )
        locations.append(loc)

    for region in regions_list:
        loc = Location(
            region["continent"],
            region["country"],
            region["region"],
            latitude=region["latitude"],
            longitude=region["longitude"],
            country_code=region["country_code"],
        )
        locations.append(loc)

    for unknown in unknown_list:
        loc = Location(region=unknown)
        locations.append(loc)

    # Log time taken
    process_time_taken = time.time() - process_start_time
    logger.debug(f"Location finder func end: {process_time_taken}s")

    return locations

find_unknown(location_str, locations_data)

Find unknown location.

Takes a location string splits it into sections. Finds unknown regions.

Creates list based on results.

Parameters:

Name Type Description Default
location_str(str)

string value of GeographicRegion cell

required
LOCATIONS_DATA(object)

location data

required

Returns:

Name Type Description
location_objs list

list of Location objects

Source code in report_generator/location_formatter/location_finder.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def find_unknown(location_str: str, locations_data: object) -> object:
    """Find unknown location.

    Takes a location string splits it into sections. Finds unknown regions.

    Creates list based on results.

    Args:
        location_str(str): string value of GeographicRegion cell
        LOCATIONS_DATA(object) : location data

    Returns:
        location_objs(list): list of Location objects

    """
    if locations_data is None:
        raise Exception
    # logger
    logger.debug("unknown finder func Start")
    process_start_time = time.time()

    # Handling nan/null dataset error
    if str(location_str) == "nan":
        location_str = ""

    # Split location string into component sections
    # location_str = re.sub("island|islands", "", location_str.lower()).strip()
    sections = re.split("[,/()-+]", location_str.lower().strip())

    # Unknown list
    unknown_list = []

    # Iterate through the sections to try and determine what kind of location they are

    for section in sections:
        # logger.debug("Section: {section}")

        # Data Cleaning
        # section = re.sub(r"\ss$", "", section)
        section = section.lower().strip().strip(".")

        # Handle common America issue
        if section == "central":
            section = "central america"
        if section == "north":
            section = "north america"
        if section == "south":
            section = "south america"

        # Iterate to check if string is in continents, countries or regions

        conditions = [
            (section not in locations_data["continent"]),
            (section not in locations_data["country"].keys()),
            (section not in locations_data["region"].keys()),
        ]

        if all(conditions):
            unknown_list.append(section)

    # Log time taken
    process_time_taken = time.time() - process_start_time
    logger.debug(f"Location finder func end: {process_time_taken}s")

    return unknown_list