Skip to content

UTILS API Documentation

utils.py

General-purpose utility functions for file management, numerical operations, and geospatial transformations.

Note

This module is intended for internal use within the library, but may be imported elsewhere as needed.

Author: Travis Alongi (talongi@usgs.gov)

convert_dd_catalog_to_csv(input_txt, output_csv, skip_lines=97, header_names_list=None)

Convert a Northern California DD catalog .txt file to a CSV.

Parameters:

input_txt : str Path to the downloaded DD catalog file. output_csv : str Path to output CSV file.

Source code in utils.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def convert_dd_catalog_to_csv(
    input_txt: str, output_csv: str, skip_lines: int = 97, header_names_list=None
):
    """
    Convert a Northern California DD catalog .txt file to a CSV.

    Parameters:
    ----------
    input_txt : str
        Path to the downloaded DD catalog file.
    output_csv : str
        Path to output CSV file.
    """
    if header_names_list is None:
        header_names = [
            "year",
            "month",
            "day",
            "hour",
            "minute",
            "second",
            "lat",
            "lon",
            "depth_km",
            "eh1_km",
            "eh2_km",
            "azimuth_deg",
            "ez_km",
            "mag",
            "event_id",
        ]
        header_line = ",".join(header_names)

    else:
        header_names = header_names_list
        header_line = ",".join(header_names)

    # Awk is just easier for this than python
    awk_command = (
        f"awk 'NR > {skip_lines} {{ "
        f"for (i = 1; i <= NF; i++) {{ "
        f'printf "%s%s", $i, (i==NF ? "\\n" : ",") '
        f"}} "
        f"}}' {input_txt}"
    )

    # Get the total number of lines in the input file to use with tqdm
    total_lines = subprocess.check_output(f"wc -l < {input_txt}", shell=True)
    total_lines = int(total_lines.strip())

    # Open output file, write the header, then run awk with tqdm progress bar
    with open(output_csv, "w") as out:
        out.write(header_line + "\n")  # Write header row
        # Run awk with a subprocess, updating tqdm with each line processed
        with subprocess.Popen(awk_command, shell=True, stdout=subprocess.PIPE) as proc:
            # Use tqdm to track progress based on the total number of lines
            with tqdm(total=total_lines - 97, desc="Processing file") as pbar:
                for line in proc.stdout:
                    out.write(line.decode("utf-8"))  # Write output to CSV
                    pbar.update(1)

    print(f"CSV with headers written to {output_csv}")

convert_latlon_to_utm(lat, lon, return_crs=False)

Converts latitude and longitude arrays to UTM coordinates.

This function transforms latitude and longitude values to UTM (Universal Transverse Mercator) coordinates using the appropriate UTM projection.

Parameters:

Name Type Description Default
lat array - like

Latitude values (in degrees).

required
lon array - like

Longitude values (in degrees).

required

Returns:

Name Type Description
tuple

A tuple of UTM X and Y coordinates.

Example
import numpy as np
latitudes = np.array([37.0, 37.1])
longitudes = np.array([-121.9, -121.8])
utm_x, utm_y = convert_latlon_to_utm(latitudes, longitudes)
print(utm_x, utm_y)
Source code in utils.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def convert_latlon_to_utm(lat, lon,return_crs=False):
    """
    Converts latitude and longitude arrays to UTM coordinates.

    This function transforms latitude and longitude values to UTM (Universal
    Transverse Mercator) coordinates using the appropriate UTM projection.

    Args:
        lat (array-like): Latitude values (in degrees).
        lon (array-like): Longitude values (in degrees).

    Returns:
        tuple: A tuple of UTM X and Y coordinates.

    Example:
        ```python
        import numpy as np
        latitudes = np.array([37.0, 37.1])
        longitudes = np.array([-121.9, -121.8])
        utm_x, utm_y = convert_latlon_to_utm(latitudes, longitudes)
        print(utm_x, utm_y)
        ```
    """
    utm_crs_list = query_utm_crs_info(
        datum_name="WGS 84",
        area_of_interest=AreaOfInterest(
            south_lat_degree=lat.min(),
            north_lat_degree=lat.max(),
            east_lon_degree=lon.max(),
            west_lon_degree=lon.min(),
        ),
    )
    utm_crs = CRS.from_epsg(utm_crs_list[0].code)
    # print(f'Converted Lat/Lon to {utm_crs}')
    transformer = Transformer.from_proj("epsg:4326", utm_crs, always_xy=True)
    if return_crs is True:
        return transformer.transform(lon, lat), utm_crs
    else:
        return transformer.transform(lon, lat)

decompress_gz_file(gz_path, dest_path=None, overwrite=False)

Decompress a .gz file to the destination path.

Source code in utils.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def decompress_gz_file(
    gz_path: str, dest_path: str | None = None, overwrite: bool = False
) -> str:
    """Decompress a .gz file to the destination path."""
    if dest_path is None:
        dest_path = gz_path[:-3]  # Remove .gz

    if os.path.exists(dest_path) and not overwrite:
        print(f"{dest_path} already exists. Skipping decompression.")
        return dest_path

    print(f"Decompressing {gz_path}...")

    with gzip.open(gz_path, "rb") as f_in, open(dest_path, "wb") as f_out:
        shutil.copyfileobj(f_in, f_out)

    print(f"Decompressed to {dest_path}")
    return dest_path

download_file(url, dest_path)

Download a file from a URL if it doesn't already exist.

Parameters:

Name Type Description Default
url str

The URL to download from.

required
dest_path str

Full path to save the file (including filename).

required

Returns:

Type Description
str

Path to the downloaded file, or None if failed.

Source code in utils.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def download_file(url: str, dest_path: str) -> str:
    """
    Download a file from a URL if it doesn't already exist.

    Args:
        url: The URL to download from.
        dest_path: Full path to save the file (including filename).

    Returns:
        Path to the downloaded file, or None if failed.
    """
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

    if os.path.exists(dest_path):
        print(f"{os.path.basename(dest_path)} already exists. Skipping download.")
        return dest_path

    print(f"Downloading {os.path.basename(dest_path)}...")

    try:
        response = requests.get(url, stream=True, timeout=10,verify=False)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error downloading {os.path.basename(dest_path)}: {e}")
        return None

    total_size = int(response.headers.get("content-length", 0))
    block_size = 1024

    with (
        open(dest_path, "wb") as file,
        tqdm(
            total=total_size,
            unit="B",
            unit_scale=True,
            desc=os.path.basename(dest_path),
        ) as bar,
    ):
        for data in response.iter_content(block_size):
            file.write(data)
            bar.update(len(data))

    print(f"Downloaded to {dest_path}")
    return dest_path

midpoint(array)

Calculates midpoints between adjacent items in an array.

This function returns the midpoints of adjacent values in the input array.

Parameters:

Name Type Description Default
array ndarray

A 1D array of numerical values (floats or ints).

required

Returns:

Name Type Description
array

A 1D array of midpoints between adjacent values.

Example
import numpy as np
values = np.array([0, 10, 20, 30])
midpoints = midpoint(values)
print(midpoints)  # Output: [5. 15. 25.]
Source code in utils.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def midpoint(array):
    """
    Calculates midpoints between adjacent items in an array.

    This function returns the midpoints of adjacent values in the input array.

    Args:
        array (numpy.ndarray): A 1D array of numerical values (floats or ints).

    Returns:
        array: A 1D array of midpoints between adjacent values.

    Example:
        ```python
        import numpy as np
        values = np.array([0, 10, 20, 30])
        midpoints = midpoint(values)
        print(midpoints)  # Output: [5. 15. 25.]
        ```
    """
    y = (array[1:] + array[:-1]) / 2
    return y

remove_temp_files(directory='./input', pattern='*temp*')

Removes temporary files

Source code in utils.py
249
250
251
252
253
254
255
256
257
def remove_temp_files(directory: str = "./input", pattern: str = "*temp*"):
    """Removes temporary files"""
    files_to_remove = glob.glob(os.path.join(directory, pattern))
    for file in files_to_remove:
        try:
            os.remove(file)
            print(f"Removed: {file}")
        except Exception as e:
            print(f"Failed to remove {file}: {e}")

setup_output_directory(output_dir, delete_existing=False)

Ensure output directory exists and optionally clear existing files.

Parameters:

Name Type Description Default
output_dir str or Path - like

Directory where data will be stored.

required
delete_existing bool

If True, delete all files in the output directory. Defaults to False.

False
Example

setup_output_directory("results/") # creates the dir if needed, doesn't delete setup_output_directory("results/", delete_existing=True) # deletes files first

Source code in utils.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def setup_output_directory(output_dir, delete_existing=False):
    """
    Ensure output directory exists and optionally clear existing files.

    Args:
        output_dir (str or Path-like): Directory where data will be stored.
        delete_existing (bool, optional): If True, delete all files in the output directory.
                                          Defaults to False.

    Example:
        setup_output_directory("results/")  # creates the dir if needed, doesn't delete
        setup_output_directory("results/", delete_existing=True)  # deletes files first
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    if delete_existing:
        for file in output_dir.iterdir():
            if file.is_file():
                file.unlink()