Skip to content

Location updater

Location updater.

Module used to take locations data from a DataFrame object and update that data to provide better information.

load_location_json(file_path)

Load location json.

Opens and returns location object from json file

Parameters:

Name Type Description Default
file_path str

file path string

required

Returns:

Name Type Description
location_data object

location data python object

Source code in report_generator/location_formatter/location_updater.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def load_location_json(file_path: str) -> object:
    """Load location json.

    Opens and returns location object from json file

    Args:
        file_path (str): file path string

    Returns:
        location_data (object): location data python object

    """
    with open(file_path, "r", encoding="utf-8") as file:
        return json.load(file)

load_locations_data()

Load locations data.

Opens json file and loads locations data from json and returns locations object.

Returns:

Name Type Description
locations_data dict

Python dict containing location info

Source code in report_generator/location_formatter/location_updater.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def load_locations_data() -> dict:
    """Load locations data.

    Opens json file and loads locations data from json
    and returns locations object.

    Returns:
        locations_data (dict): Python dict containing location info

    """
    # load location object
    logger.info("Read location json Start")
    process_start_time = time.time()
    config = report_generator.config.load_config()
    file_path = os.path.join(
        config["dir_path"], "data", "locations", "location_json", "location.json"
    )
    locations_data = load_location_json(file_path)

    process_time_taken = time.time() - process_start_time
    logger.info(f"Read location json end: {process_time_taken}s")

    return locations_data

save_locations_data(locations_data)

Save locations data.

Takes the locations data object and uses json.dumps to write the data to a json file.

Source code in report_generator/location_formatter/location_updater.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def save_locations_data(locations_data: object) -> None:
    """Save locations data.

    Takes the locations data object and uses json.dumps to
    write the data to a json file.

    Args:
        locations_data - object containing locations data

    """
    logger.debug("Saving locations data")
    dumped = json.dumps(locations_data)
    config = report_generator.config.load_config()
    file_path = os.path.join(
        config["dir_path"], "data", "locations", "location_json", "location.json"
    )
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(dumped)

search_for_unknowns(unknowns)

Search for unknowns.

Takes list of unidentified locations and searches locations database for results matching the unknown location string.

Parameters:

Name Type Description Default
unknowns list

list of unknown location strings.

required

Returns:

Name Type Description
results

list of results from searching locations database

Source code in report_generator/location_formatter/location_updater.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def search_for_unknowns(unknowns: list):
    """Search for unknowns.

    Takes list of unidentified locations and searches locations database for
    results matching the unknown location string.

    Args:
        unknowns: list of unknown location strings.

    Returns:
        results: list of results from searching locations database

    """
    # define search func
    def unknown_sql_query(unknown: str):
        # Timing query
        logger.info("Location unknown value sql query start")
        process_start_time = time.time()

        time.sleep(0.01)
        # Sql connection/cursor
        conn = sqlite3.connect("location_database/location.db")
        cursor = conn.cursor()

        # Sql query
        sql = f"""
        select Country_Name, Continent_Name, latitude, longitude, country_code
        from geocode
        join country_codes on geocode.country_code=country_codes.Two_Letter_Country_Code
        where place_name like '{unknown.title()}'
        or alternate_names like '{unknown.title}%' limit 1
        """

        # Execute
        cursor.execute(sql)
        rows = cursor.fetchall()
        logger.debug(unknown)

        # Check if result
        if rows != []:
            results.append([unknown, *list(rows[0])])
        else:
            logger.debug("no match")

        # End logging time
        process_time_taken = time.time() - process_start_time
        logger.info(f"Location unknown value sql query end: {process_time_taken}s")

    logger.info("Search for unknowns Start")
    process_start_time = time.time()
    results = []
    try:
        for unknown in unknowns:
            unknown_sql_query(unknown)

    except Error as e:
        logger.error(e)

    process_time_taken = time.time() - process_start_time
    logger.info(f"Search for unknowns end: {process_time_taken}s")

    return results

update_location(data_frame)

Update location.

Takes a pandas data frame object and runs the location finder on each cell of the Location column/series

Updates the data frames values for the Location column then returns the updated data frame.

Parameters:

Name Type Description Default
data_frame pandas.DataFrame

Pandas DataFrame object

required

Returns:

Name Type Description
updated_data_frame pandas.DataFrame

Pandas DataFrame object

Source code in report_generator/location_formatter/location_updater.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def update_location(data_frame: pandas.DataFrame) -> pandas.DataFrame:
    """Update location.

    Takes a pandas data frame object and runs the location finder on each cell of the
    Location column/series

    Updates the data frames values for the Location column then returns the updated
    data frame.

    Args:
        data_frame (pandas.DataFrame): Pandas DataFrame object

    Returns:
        updated_data_frame (pandas.DataFrame): Pandas DataFrame object

    """
    # Load Locations Data
    LOCATIONS_DATA = load_locations_data()

    logger.info("Update Location entries Start")
    process_start_time = time.time()

    updated_data_frame = update_location_entries(data_frame, LOCATIONS_DATA)

    process_time_taken = time.time() - process_start_time
    logger.info(f"Update Location entries end: {process_time_taken}s")

    return updated_data_frame

update_location_entries(data_frame, LOCATIONS_DATA)

Update location entries.

Takes pandas data frame object and calls lambda function on each entry in 'GeographicRegion' to update the entry. Then returns updated data frame object.

Parameters:

Name Type Description Default
data_frame pandas.DataFrame

Pandas DataFrame object

required
LOCATIONS_DATA object)

location data

required

Returns:

Name Type Description
data_frame pandas.DataFrame

Pandas DataFrame object

Source code in report_generator/location_formatter/location_updater.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def update_location_entries(
    data_frame: pandas.DataFrame, LOCATIONS_DATA: object
) -> pandas.DataFrame:
    """Update location entries.

    Takes pandas data frame object and calls lambda function on each entry in
    'GeographicRegion' to update the entry. Then returns updated data frame object.

    Args:
        data_frame (pandas.DataFrame): Pandas DataFrame object
        LOCATIONS_DATA (object) : location data

    Returns:
        data_frame (pandas.DataFrame): Pandas DataFrame object

    """
    locations = data_frame["GeographicRegion"].values.tolist()
    LOCATIONS_DATA = update_locations_unknowns(locations, LOCATIONS_DATA)

    data_frame["FormattedGeographicRegion"] = data_frame["GeographicRegion"].apply(
        lambda x: update_location_entry(x, LOCATIONS_DATA)
    )

    return data_frame

update_location_entry(location_str, LOCATIONS_DATA)

Update location entry.

Takes the 'GeographicRegion' cell string value. Splits it into sections. Attempts to transform into a more robust location value

'continent-country-region/continent-country-region/etc'

Parameters:

Name Type Description Default
location_str str

string location value

required
LOCATIONS_DATA object)

location data

required

Returns:

Name Type Description
updated_location_str str

string updated location value

Source code in report_generator/location_formatter/location_updater.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def update_location_entry(location_str: str, LOCATIONS_DATA: object) -> str:
    """Update location entry.

    Takes the 'GeographicRegion' cell string value. Splits it into sections.
    Attempts to transform into a more robust location value

        'continent-country-region/continent-country-region/etc'

    Args:
        location_str (str): string location value
        LOCATIONS_DATA (object) : location data


    Returns:
        updated_location_str (str): string updated location value

    """
    # location_strs = location_str.split("/")
    # updated_location_strings = []

    # for location in location_strs:
    #     location_obj = create_location_obj(location)
    #     updated_location_strings.append(location_obj.__str__())

    locations = find_location(location_str, LOCATIONS_DATA)

    updated_location_strings = []

    for location in locations:
        updated_location_strings.append(location.__str__())

    return "/".join(updated_location_strings)

update_locations_data(results, locations_data)

Update locations data.

Updates the locations data with results of unknown location search.

Parameters:

Name Type Description Default
results list

list of results from unknown locaiton search

required
locations_data object

Object containing locations data

required
Source code in report_generator/location_formatter/location_updater.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def update_locations_data(results: list, locations_data: object) -> None:
    """Update locations data.

    Updates the locations data with results of unknown location search.

    Args:
        results: list of results from unknown locaiton search
        locations_data: Object containing locations data

    """
    for region in results:
        logger.debug(region[0])
        region_data = {
            "region": region[0],
            "country": str(region[1]).split(",")[0],
            "continent": region[2],
            "latitude": region[3],
            "longitude": region[4],
            "country_code": region[5],
            "country_full_name": region[1],
        }
        locations_data["region"][region[0].lower()] = region_data

    save_locations_data(locations_data)
    return locations_data

update_locations_unknowns(locs, locations_data)

Update unknown locations.

Takes list of unknown locations and passes to the find unknown method. If data is still unknown passes to the search for unknowns method. Takes these results and passes to the update locations data method to update the location data.

Parameters:

Name Type Description Default
locs list

list of locations

required
locations data

Object containing location data

required

Returns:

Name Type Description
locations_data object

Object containing location data

Source code in report_generator/location_formatter/location_updater.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def update_locations_unknowns(locs: list, locations_data: object) -> object:
    """Update unknown locations.

    Takes list of unknown locations and passes to the find unknown method.
    If data is still unknown passes to the search for unknowns method.
    Takes these results and passes to the update locations data method to
    update the location data.

    Args:
        locs: list of locations
        locations data: Object containing location data

    Returns:
        locations_data: Object containing location data

    """
    unknowns = [find_unknown(loc, locations_data) for loc in locs]
    unknowns = [*{item for sublist in unknowns for item in sublist}]
    results = search_for_unknowns(unknowns)
    return update_locations_data(results, locations_data)