Skip to content

Locations data setup

Location data setup module.

Module contains the following functions

insert_default_data copy_files_from_directory location_data_setup location_data_cleanup download_location_data_file extract_location_data_file split_location_data_file

copy_files_from_directory(old_directory, new_directory)

Copy a file.

Source code in report_generator/project_setup/locations_data_setup.py
110
111
112
def copy_files_from_directory(old_directory: str, new_directory: str) -> None:
    """Copy a file."""
    shutil.copytree(old_directory, new_directory)

default_data_setup(dir_path, data_path)

Sets up the projects default data.

Downloads default data file from git repository and extracts it into the projects data directory.

Parameters:

Name Type Description Default
dir_path str

The path to the project directory

required
data_path str

The path to the data directory

required
Source code in report_generator/project_setup/locations_data_setup.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def default_data_setup(dir_path: str, data_path) -> None:
    """Sets up the projects default data.

    Downloads default data file from git repository and
    extracts it into the projects data directory.

    Args:
        dir_path (str):     The path to the project directory
        data_path (str):    The path to the data directory
    """
    logger.debug("Downloading default data files.")
    download_default_data(dir_path)
    logger.debug("Extracting default data files")
    data_zip_path = os.path.join(dir_path, "data.zip")
    extract_default_data(data_zip_path, data_path)

download_default_data(project_dir)

Download the default project data.

Downloads the default data zip file from the project git repository.

Parameters:

Name Type Description Default
project_dir str

The project directory for the files to be downloaded into.

required
Source code in report_generator/project_setup/locations_data_setup.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def download_default_data(project_dir: str) -> None:
    """Download the default project data.

    Downloads the default data zip file from the project
    git repository.

    Args:
        project_dir (str): The project directory for the files
                           to be downloaded into.
    """
    logger.info("Downloading location data file")
    default_data_url = (
        "https://github.com/ccushnahan/report_generator/raw/main/data.zip"
    )
    with requests.get(default_data_url, stream=True) as response:
        file_path = os.path.join(project_dir, "data.zip")
        response.raise_for_status()
        with open(file_path, "wb") as file:
            progress_bar = tqdm.tqdm(total=int(response.headers["Content-Length"]))
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
                progress_bar.update(len(chunk))

download_location_data_file(location_dir)

Download location data files.

Download location data file from the geocodes website.

Source code in report_generator/project_setup/locations_data_setup.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def download_location_data_file(location_dir: str) -> None:
    """Download location data files.

    Download location data file from the geocodes website.
    """
    logger.info("Downloading location data file")
    location_file_url = "https://download.geonames.org/export/dump/allCountries.zip"
    with requests.get(location_file_url, stream=True) as response:
        """"""
        file_path = os.path.join(location_dir, "all_countries.zip")
        response.raise_for_status()
        with open(file_path, "wb") as file:
            progress_bar = tqdm.tqdm(total=int(response.headers["Content-Length"]))
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
                progress_bar.update(len(chunk))

extract_default_data(data_zip_path, data_path)

Unzips default data file.

Takes the default data zip file and uses zipf to extract it to the projects data directory.

Parameters:

Name Type Description Default
data_zip_path str

Path to the default data zip file.

required
data_path str

Path to the projects data directory

required
Source code in report_generator/project_setup/locations_data_setup.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def extract_default_data(data_zip_path: str, data_path: str) -> None:
    """Unzips default data file.

    Takes the default data zip file and uses zipf to extract it to
    the projects data directory.

    Args:
        data_zip_path (str):    Path to the default data zip file.
        data_path (str):        Path to the projects data directory
    """
    with zipfile.ZipFile(data_zip_path) as zipf:
        for member in tqdm.tqdm(zipf.infolist(), desc="Extracting"):
            zipf.extract(member, data_path)

extract_location_data_file(new_locations_path, location_data_zip_path)

Extract location data.

Source code in report_generator/project_setup/locations_data_setup.py
158
159
160
161
162
163
164
165
166
def extract_location_data_file(
    new_locations_path: str, location_data_zip_path: str
) -> None:
    """Extract location data."""
    logger.info("Unzipping Locations Data File")

    with zipfile.ZipFile(location_data_zip_path) as zipf:
        for member in tqdm.tqdm(zipf.infolist(), desc="Extracting"):
            zipf.extract(member, new_locations_path)

insert_default_data(dir_path)

Insert default data.

Inserts default data files into the data folder by copying files from the packages data file.

Parameters:

Name Type Description Default
dir_path str

String path to the project directory.

required
Source code in report_generator/project_setup/locations_data_setup.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def insert_default_data(dir_path: str) -> None:
    """Insert default data.

    Inserts default data files into the data folder by copying files
    from the packages data file.

    Args:
        dir_path (str): String path to the project directory.

    """
    logger.info("Inserting Default Data")
    data_path = os.path.join(dir_path, "data")
    # data_path = pkg_resources.resource_string(__name__, "")
    logger.debug(data_path)
    # exit()
    os.path.join(data_path, "images")
    os.path.join(data_path, "fonts")
    os.path.join(data_path, "location")

    os.path.join(dir_path, "data", "images")
    os.path.join(dir_path, "data", "fonts")
    new_locations_path = os.path.join(dir_path, "data", "locations")

    # copy_files_from_directory(old_image_path, new_image_path)
    # copy_files_from_directory(old_font_path, new_fonts_path)
    # copy_files_from_directory(old_locations_path, new_locations_path)

    default_data_setup(dir_path, data_path)
    location_data_setup(new_locations_path)

location_data_cleanup(locations_path, locations_data_file_path)

Cleanup unneeded location data.

Source code in report_generator/project_setup/locations_data_setup.py
129
130
131
132
133
134
135
136
137
def location_data_cleanup(locations_path, locations_data_file_path):
    """Cleanup unneeded location data."""
    logger.info("Cleaning up data.")
    zip_path = locations_data_file_path
    txt_path = os.path.join(locations_path, "allCountries.txt")

    os.remove(zip_path)
    os.remove(txt_path)
    logger.info("Clean up complete.")

location_data_setup(new_locations_path)

Location Data Setup.

Source code in report_generator/project_setup/locations_data_setup.py
115
116
117
118
119
120
121
122
123
124
125
126
def location_data_setup(new_locations_path: str) -> None:
    """Location Data Setup."""
    logger.info("Starting Location data setup")
    download_location_data_file(new_locations_path)

    locations_data_file_path = os.path.join(new_locations_path, "all_countries.zip")
    # locations_data_file_path = os.path.join("/", "home", "cush","a","data", "locations", "csv_files", "all_countries.zip")

    extract_location_data_file(new_locations_path, locations_data_file_path)
    split_location_data_file(new_locations_path)
    location_data_cleanup(new_locations_path, locations_data_file_path)
    logger.info("Location data setup complete")

split_location_data_file(locations_path)

Split location data into smaller files.

Source code in report_generator/project_setup/locations_data_setup.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def split_location_data_file(locations_path: str) -> None:
    """Split location data into smaller files."""
    # Csv header
    file_header = "geoname_id\tplace_name\tascii_name\talternate_names\tlatitude"
    file_header += (
        "\tlongitude\tfeature_class\tfeature_code\tcountry_code\tcc2\tadmin1_code"
    )
    file_header += (
        "\tadmin2_code\tadmin3_code\tadmin4_code\tpopulation_info\televation\tdem"
    )
    file_header += "\ttimezone\tmodification\n"

    # Lines for each split file
    lines_per_file = 5000

    file_location = os.path.join(locations_path, "allCountries.txt")

    logger.info("Splitting csv into chunks:")
    os.makedirs(os.path.join(locations_path, "csv_files", "split_csv"))
    smallfile = None
    with open(file_location, "r", encoding="utf-8") as big_file:
        progress_bar = tqdm.tqdm(total=os.path.getsize(file_location), desc="Splitting")
        for lineno, line in enumerate(big_file):
            if lineno % lines_per_file == 0:
                if smallfile:
                    smallfile.close()
                small_filename = f"small_csv_file_{lineno + lines_per_file}.csv"
                small_filepath = os.path.join(
                    locations_path, "csv_files", "split_csv", small_filename
                )
                smallfile = open(small_filepath, "a", encoding="utf-8")
                smallfile.write(file_header)
            smallfile.write(line)
            progress_bar.update(len(line))
        if smallfile:
            smallfile.close()