Files
three_60/backend/etl.py
2026-04-01 12:40:40 -04:00

247 lines
7.5 KiB
Python

from database import connect_to_db
import random
from datetime import datetime, timezone, timedelta
from simulator.sim_config import ALARM_TEXTS, TICKET_TEXTS, ASSIGNEES, SITE_IDS
from config import settings
def main() -> None:
# create_cellsites_table()
create_alarms_table()
create_incidents_table()
# create_change_table()
create_robots_table()
def create_alarms_table():
conn = connect_to_db()
print("Dropping any existing table")
conn.execute("DROP TABLE IF EXISTS alarms")
print("Creating table")
conn.execute("""
CREATE TABLE IF NOT EXISTS alarms
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT,
severity INT,
site_id INT,
incident_id INTEGER,
created INT,
updated INT,
status TEXT,
created_by TEXT
)
""")
print("Done creating table.")
conn.commit()
conn.close()
def create_incidents_table():
conn = connect_to_db()
print("Dropping any existing table")
conn.execute("DROP TABLE IF EXISTS incidents")
print("Creating table")
conn.execute("""
CREATE TABLE IF NOT EXISTS incidents
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT,
severity INTEGER,
site_id INTEGER,
created INTEGER,
updated INTEGER,
assigned_to TEXT,
status TEXT,
created_by TEXT
)
""")
conn.commit()
print("Seeding historical incidents...")
number_to_seed = 500
now = datetime.now(timezone.utc)
records = []
for i in range(number_to_seed):
created_dt = now - timedelta(days=random.randint(1, 730), hours=random.randint(0, 23), minutes=random.randint(0, 59))
updated_dt = created_dt + timedelta(hours=random.randint(1, 72))
created = int(created_dt.timestamp())
updated = int(updated_dt.timestamp())
records.append((
random.choice(TICKET_TEXTS),
random.randint(1, 5),
random.choice(SITE_IDS),
created,
updated,
random.choice(ASSIGNEES),
'closed',
'etl'
))
conn.executemany("""
INSERT INTO incidents (text, severity, site_id, created, updated, assigned_to, status, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", records)
conn.commit()
conn.close()
print(f"Done. Seeded {len(records)} historical incidents.")
# TODO: Implement change requests simulation
# def create_change_table():
# conn = connect_to_db()
# # Create table
# print("Dropping any existing table")
# conn.execute("DROP TABLE IF EXISTS changes")
# print("Creating table")
# conn.execute("""
# CREATE TABLE IF NOT EXISTS changes
# (
# id INTEGER PRIMARY KEY AUTOINCREMENT,
# text TEXT,
# severity INTEGER
# site_id INTEGER
# created INTEGER
# updated INTEGER
# assigned_to TEXT,
# status TEXT,
# created_by TEXT
# )
# """)
# print("Done creating table.")
# conn.commit()
# conn.close()
def create_robots_table():
from simulator.sim_config import ASSIGNEES
conn = connect_to_db()
print("Dropping any existing table")
conn.execute("DROP TABLE IF EXISTS robots")
print("Creating table")
conn.execute("""
CREATE TABLE IF NOT EXISTS robots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
base_lat REAL,
base_lon REAL,
current_incident_id INTEGER,
current_site_id INTEGER,
lat REAL,
lon REAL,
target_lat REAL,
target_lon REAL,
updated INT
)
""")
# NOTE: Seeding attempts.
import random
now = int(__import__('time').time())
records = []
for name in ASSIGNEES:
base_lat = round(random.uniform(32.0, 42.0), 6)
base_lon = round(random.uniform(-91.5, -75.5), 6)
records.append((name, base_lat, base_lon, base_lat, base_lon, now))
conn.executemany(
"INSERT INTO robots (name, base_lat, base_lon, lat, lon, updated) VALUES (?, ?, ?, ?, ?, ?)",
records
)
conn.commit()
conn.close()
print(f"Done. Inserted {len(ASSIGNEES)} robots.")
def create_cellsites_table():
file_path = "../data/cell_towers_2026-03-21-T000000.csv"
conn = connect_to_db()
# Create table
print("Dropping any existing table")
conn.execute("DROP TABLE IF EXISTS cellsites")
print("Creating table")
conn.execute("""
CREATE TABLE IF NOT EXISTS cellsites
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
radio TEXT,
mcc INTEGER,
net INTEGER,
area INTEGER,
cell INTEGER,
unit INTEGER,
lon REAL,
lat REAL,
range INTEGER,
samples INTEGER,
changeable INTEGER,
created INTEGER,
updated INTEGER,
averageSignal INTEGER
)
""")
print("Done creating table.")
print("Starting data insertion")
import csv
with open(file_path, 'r') as file:
dr = csv.DictReader(file)
print(next(dr))
to_db = [(i['radio'], i['mcc'], i['net'], i['area'], i['cell'], i['unit'], i['lon'], i['lat'], i['range'], i['samples'], i['changeable'], i['created'], i['updated'], i['averageSignal']) for i in dr]
cur = conn.cursor()
cur.executemany("INSERT INTO cellsites (radio, mcc, net, area, cell, unit, lon, lat, range, samples, changeable, created, updated, averageSignal) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
conn.commit()
conn.close()
print("Done inserting data")
# TODO: Probably don't need this
def get_csv_file():
# NOTE: They rate limit two files per day.
"""
200
application/json
None
{'status': 'error', 'message': 'RATE_LIMITED', 'help': 'To ensure fair usage for all users, we only allow 2 downloads per file, per day.'}
"""
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def download_database(api_key: str, output_path: str) -> None:
"""Downloads the OpenCellID database CSV.
Args:
api_key (str): OpenCellID API key
output_path (str): path to save the downloaded file
"""
url = f"https://opencellid.org/ocid/downloads?token={api_key}&type=full&file=cell_towers.csv.gz"
with requests.get(url, stream=True, timeout=120, verify=False) as response:
print(response.status_code)
print(response.headers.get("Content-Type"))
print(response.headers.get("Content-Length"))
print(response.json())
response.raise_for_status()
if response.headers.get("Content-Length") == None:
print("ERROR")
return
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
# TODO: add .env support and hide the key
API_KEY = settings.api_key
download_database(API_KEY, "../data/cell_towers.csv.gz")
if __name__ == "__main__":
main()
# get_csv_file()