|
|
|
|
|
|
|
|
|
|
|
|
|
import io, os, re, json, math, tempfile |
|
from typing import Dict, Any, List, Tuple, Optional |
|
|
|
import streamlit as st |
|
import pdfplumber |
|
import pandas as pd |
|
import numpy as np |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
from gtts import gTTS |
|
GTTS_OK = True |
|
except Exception: |
|
GTTS_OK = False |
|
|
|
|
|
try: |
|
from audio_recorder_streamlit import audio_recorder |
|
HAS_REC = True |
|
except Exception: |
|
HAS_REC = False |
|
|
|
st.set_page_config(page_title="Drilling Report Anomaly Dashboard", |
|
layout="wide", |
|
page_icon="π’οΈ") |
|
|
|
|
|
|
|
|
|
def extract_period_date(text: str) -> Tuple[pd.Timestamp, pd.Timestamp]: |
|
m = re.search( |
|
r"Period:\s*(\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2})\s*-\s*(\d{4}[-/]\d{2}[-/]\d{2}\s+\d{2}:\d{2})", |
|
text |
|
) |
|
if not m: return (pd.NaT, pd.NaT) |
|
try: |
|
return (pd.to_datetime(m.group(1)), pd.to_datetime(m.group(2))) |
|
except Exception: |
|
return (pd.NaT, pd.NaT) |
|
|
|
def infer_date_from_filename(name: str) -> Optional[pd.Timestamp]: |
|
m = re.search(r"(\d{4})[-_](\d{2})[-_](\d{2})", name) |
|
if m: |
|
y, M, d = map(int, m.groups()) |
|
try: return pd.Timestamp(year=y, month=M, day=d) |
|
except Exception: pass |
|
nums = re.findall(r"\d{2,4}", name) |
|
for i in range(len(nums)-2): |
|
try: |
|
y = int(nums[i]); M = int(nums[i+1]); d = int(nums[i+2]) |
|
if 1990 <= y <= 2100 and 1 <= M <= 12 and 1 <= d <= 31: |
|
return pd.Timestamp(year=y, month=M, day=d) |
|
except: pass |
|
return None |
|
|
|
def read_pdf_text_bytes(pdf_bytes: bytes) -> str: |
|
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: |
|
pages = [p.extract_text() or "" for p in pdf.pages] |
|
text = "\n".join(pages).replace("\r", "") |
|
text = re.sub(r"[ \t]+", " ", text) |
|
return text |
|
|
|
def parse_operations_depth_time(text: str, base_date: pd.Timestamp) -> pd.DataFrame: |
|
rows = [] |
|
for line in text.splitlines(): |
|
m = re.match(r"(\d{2}:\d{2})\s+(\d{2}:\d{2})\s+(\d+)\b", line.strip()) |
|
if m: |
|
start, end, depth = m.groups() |
|
try: |
|
depth = int(depth) |
|
start_dt = pd.to_datetime(f"{base_date.date()} {start}") |
|
end_dt = pd.to_datetime(f"{base_date.date()} {end}") |
|
if end_dt < start_dt: end_dt += pd.Timedelta(days=1) |
|
mid = start_dt + (end_dt - start_dt)/2 |
|
rows.append((start_dt, end_dt, mid, depth)) |
|
except: pass |
|
return pd.DataFrame(rows, columns=["start", "end", "mid_time", "depth_m"]) |
|
|
|
def parse_mud_density(text: str, base_date: pd.Timestamp) -> pd.DataFrame: |
|
m_time = re.search(r"Sample Time\s+(\d{2}:\d{2})\s+(\d{2}:\d{2})", text) |
|
m = re.search(r"Fluid Density\s*\(g/cm3\)\s+([\d\.\-]+)\s+([\d\.\-]+)", text) |
|
if not m or not m_time: return pd.DataFrame() |
|
try: |
|
t1, t2 = m_time.groups() |
|
v1, v2 = float(m.group(1)), float(m.group(2)) |
|
ts = [pd.to_datetime(f"{base_date.date()} {t1}"), |
|
pd.to_datetime(f"{base_date.date()} {t2}")] |
|
return pd.DataFrame({"time": ts, "density_gcm3": [v1, v2]}) |
|
except: return pd.DataFrame() |
|
|
|
def parse_bit_record_rop(text: str) -> pd.DataFrame: |
|
m_hole = re.search(r"Hole\s+Made\s*\(last\s*24H\)\s*([\d\.\-]+)", text, re.IGNORECASE) |
|
m_hrs = re.search(r"Hours\s+Drilled\s*\(last\s*24H\)\s*([\d\.\-]+)", text, re.IGNORECASE) |
|
if not m_hole or not m_hrs: return pd.DataFrame() |
|
try: |
|
hole = float(m_hole.group(1)); hrs = float(m_hrs.group(1)) |
|
rop = hole/hrs if hrs and hrs>0 else np.nan |
|
return pd.DataFrame([{"hole_made_m": hole, "hours_drilled": hrs, "rop_m_per_hr": rop}]) |
|
except: return pd.DataFrame() |
|
|
|
def parse_equipment_downtime_minutes(text: str) -> float: |
|
blk = re.split(r"Equipment Failure Infor(?:mation|mation)", text, flags=re.IGNORECASE) |
|
if len(blk) < 2: return 0.0 |
|
tail = blk[1] |
|
mins = re.findall(r"\b(\d{1,4})\s*(?:min|)\b", tail) |
|
vals = [] |
|
for x in mins: |
|
try: |
|
v = float(x) |
|
if 0 <= v <= 1440: vals.append(v) |
|
except: pass |
|
positives = [v for v in vals if v > 0] |
|
return float(sum(positives) if positives else 0.0) |
|
|
|
|
|
|
|
|
|
ANOMALY_JSON_SCHEMA = { |
|
"name": "AnomalyClassification", |
|
"strict": True, |
|
"schema": { |
|
"type": "object", |
|
"additionalProperties": False, |
|
"properties": { |
|
"is_anomalous": {"type": "boolean"}, |
|
"labels": { |
|
"type": "array", |
|
"items": {"type": "string", "enum": ["losses","stuck_pipe","pack_off"]}, |
|
"uniqueItems": True |
|
}, |
|
"rationale": {"type": "string"}, |
|
"spans": { |
|
"type": "array", |
|
"items": { |
|
"type": "object", |
|
"additionalProperties": False, |
|
"properties": { |
|
"label": {"type": "string", "enum": ["losses","stuck_pipe","pack_off"]}, |
|
"text": {"type": "string"} |
|
}, |
|
"required": ["label","text"] |
|
} |
|
} |
|
}, |
|
"required": ["is_anomalous","labels","rationale"] |
|
} |
|
} |
|
SYSTEM_PROMPT = ( |
|
"You are a drilling anomaly detector for daily reports. " |
|
"Return ONLY JSON matching the schema. " |
|
"Taxonomy: losses (lost returns / no returns), stuck_pipe, pack_off (packed-off hole / circulation blocked). " |
|
"If no anomaly is present, set is_anomalous=false and labels=[]. " |
|
"If anomalous, include 1-2 short verbatim evidence spans." |
|
) |
|
def build_user_prompt(text: str) -> str: |
|
return "Classify anomalies among ['losses','stuck_pipe','pack_off'].\n\nREPORT TEXT:\n" + text |
|
|
|
def classify_with_openai(text: str, model: str, api_key: str) -> Dict[str, Any]: |
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=api_key) |
|
resp = client.responses.create( |
|
model=model, |
|
input=[ |
|
{"role": "system", "content": SYSTEM_PROMPT}, |
|
{"role": "user", "content": build_user_prompt(text)}, |
|
], |
|
response_format={"type": "json_schema", "json_schema": ANOMALY_JSON_SCHEMA}, |
|
temperature=0 |
|
) |
|
raw = getattr(resp, "output_text", None) |
|
if not raw: |
|
raw = resp.output[0].content[0].text |
|
return json.loads(raw) |
|
except Exception as e: |
|
return {"is_anomalous": False, "labels": [], "rationale": f"LLM failed: {e}", "spans": []} |
|
|
|
def heuristic_classify(text: str) -> Dict[str, Any]: |
|
lines = [ln.strip() for ln in text.split("\n") if ln.strip()] |
|
pat_losses = re.compile(r"\b(lost returns?|lost\s+circulation|no returns|lost\s+circ)\b", re.IGNORECASE) |
|
pat_stuck = re.compile(r"\b(stuck\s+pipe|pipe\s+stuck|string\s+stuck|differential\s+sticking)\b", re.IGNORECASE) |
|
pat_pack = re.compile(r"\b(pack(?:ed)?-?\s*off|packed\s+off|hole\s+packed\s+off|circulation\s+blocked)\b", re.IGNORECASE) |
|
labels, spans = set(), [] |
|
for ln in lines: |
|
hit = False |
|
if pat_losses.search(ln): labels.add("losses"); hit = True |
|
if pat_pack.search(ln): labels.add("pack_off"); hit = True |
|
if pat_stuck.search(ln): labels.add("stuck_pipe"); hit = True |
|
if hit: spans.append({"label": "/".join(sorted(labels)), "text": ln}) |
|
return { |
|
"is_anomalous": bool(labels), |
|
"labels": sorted(labels), |
|
"rationale": "Heuristic keyword match.", |
|
"spans": spans[:3] |
|
} |
|
|
|
def extract_event_depths_from_spans(spans: List[Dict[str, str]]) -> List[int]: |
|
depths = [] |
|
for s in spans or []: |
|
txt = s.get("text","") |
|
for m in re.finditer(r"\b(\d{3,4})\s?m\b", txt.lower()): |
|
try: depths.append(int(m.group(1))) |
|
except: pass |
|
return depths |
|
|
|
|
|
|
|
|
|
|
|
def parse_survey_pdf_bytes(pdf_bytes: bytes) -> pd.DataFrame: |
|
rows = [] |
|
num = r'[-+]?(?:\d{1,3}(?:,\d{3})*|\d+)(?:\.\d+)?' |
|
row_re = re.compile( |
|
rf'^\s*({num})\s+({num})\s+({num})\s+({num})\s+({num})\s+({num})\s+({num})\s+({num})\s+({num})\s*({num})\s*$' |
|
) |
|
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: |
|
for page in pdf.pages: |
|
txt = page.extract_text() or "" |
|
for line in txt.splitlines(): |
|
line = line.strip() |
|
if not line or (line[0].isalpha() and not line.split()[0].replace('.', '', 1).isdigit()): |
|
continue |
|
m = row_re.match(line) |
|
if m: |
|
vals = [float(v.replace(',', '')) for v in m.groups()] |
|
rows.append(vals) |
|
if not rows: |
|
raise ValueError("No survey rows found in this PDF.") |
|
df = pd.DataFrame(rows, columns=[ |
|
'MD_m','Incl_deg','Azim_deg','E_m','VS_m','DL_deg_per30m','N_m','BR_deg_per30m','TR_deg_per30m','TVD_m' |
|
]) |
|
return df.sort_values('MD_m').reset_index(drop=True) |
|
|
|
def recompute_min_curve_with_top_lock( |
|
df: pd.DataFrame, |
|
md_col="MD_m", inc_col="Incl_deg", az_col="Azim_deg", |
|
inc_lock_deg=2.5, lateral_lock_m=8.0, roll_window=7, max_lock_md=300.0, inc_damp_deg=3.0 |
|
) -> pd.DataFrame: |
|
d = df[[md_col, inc_col, az_col]].copy().sort_values(md_col).reset_index(drop=True) |
|
md = d[md_col].to_numpy(float) |
|
inc = np.deg2rad(d[inc_col].to_numpy(float)) |
|
az = np.deg2rad(d[az_col].to_numpy(float)); az = np.unwrap(az) |
|
n = len(md) |
|
E = np.zeros(n); N = np.zeros(n); TVD = np.zeros(n) |
|
roll_inc = pd.Series(np.rad2deg(inc)).rolling(roll_window, min_periods=1).mean().to_numpy() |
|
search = (md - md[0]) <= max_lock_md |
|
i_lock_end = 0 |
|
for i in range(1, n): |
|
if not search[i]: break |
|
if roll_inc[i] >= inc_lock_deg: |
|
i_lock_end = i; break |
|
for i in range(1, n): |
|
dMD = md[i] - md[i-1] |
|
if dMD <= 0: continue |
|
i1, i2 = inc[i-1], inc[i]; a1, a2 = az[i-1], az[i] |
|
if np.rad2deg(i1) < inc_damp_deg and np.rad2deg(i2) < inc_damp_deg: a2 = a1 |
|
cos_dl = np.clip(np.sin(i1)*np.sin(i2)*np.cos(a2 - a1) + np.cos(i1)*np.cos(i2), -1.0, 1.0) |
|
dl = np.arccos(cos_dl); RF = 1.0 if dl < 1e-12 else (2.0/dl)*np.tan(dl/2.0) |
|
nx1, ex1, vz1 = np.sin(i1)*np.cos(a1), np.sin(i1)*np.sin(a1), np.cos(i1) |
|
nx2, ex2, vz2 = np.sin(i2)*np.cos(a2), np.sin(i2)*np.sin(a2), np.cos(i2) |
|
dN = 0.5 * dMD * (nx1 + nx2) * RF |
|
dE = 0.5 * dMD * (ex1 + ex2) * RF |
|
dV = 0.5 * dMD * (vz1 + vz2) * RF |
|
if i <= max(i_lock_end,0) and search[i]: dN, dE = 0.0, 0.0 |
|
N[i] = N[i-1] + dN; E[i] = E[i-1] + dE; TVD[i] = TVD[i-1] + dV |
|
if i <= i_lock_end and np.hypot(E[i], N[i]) > lateral_lock_m: i_lock_end = i |
|
out = df.copy() |
|
out["E_m"] = E; out["N_m"] = N |
|
out["TVD_m"] = (TVD + float(out["TVD_m"].iloc[0])) if "TVD_m" in out else TVD |
|
return out |
|
|
|
|
|
_ROW_START = re.compile(r'^\s*(\d{1,2}:\d{2})\s+(\d{1,2}:\d{2})\s+(\d{3,5}(?:\.\d+)?)\b') |
|
RE_LOSSES = re.compile(r'\b(loss|losses|lost\s+(?:returns|mud)|no\s+returns|lost\s+circulation)\b', re.I) |
|
RE_STUCK = re.compile(r'\b(stuck\s+pipe|stuck\b|differential\s+stuck|free\s+pipe|worked\s+pipe|overpull)\b', re.I) |
|
RE_PACKOFF = re.compile(r'\b(pack[- ]?off|packed\s*off|tight\s+hole)\b', re.I) |
|
|
|
def _clean_state_prefix(text: str) -> str: |
|
return re.sub(r'^\s*[A-Za-z/ &-]+--\s*[A-Za-z0-9/ &-]+\s*', '', text).strip() |
|
|
|
def parse_operations_anomalies_bytes(pdf_bytes: bytes) -> Tuple[Dict[str, List[Dict]], int]: |
|
groups = {"lost_circulation": [], "stuck_pipe": [], "pack_off": [], "other": []} |
|
ops_row_count = 0 |
|
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: |
|
for page in pdf.pages: |
|
text = page.extract_text() or "" |
|
lines = [ln.rstrip() for ln in text.splitlines()] |
|
ops_started, block = False, [] |
|
for ln in lines: |
|
low = ln.lower() |
|
if not ops_started and "operations" in low: |
|
ops_started = True; continue |
|
if ops_started and ("drilling fluid" in low or "pore pressure" in low or "gas reading" in low or ("casing" in low and "liner" in low)): |
|
break |
|
if ops_started: block.append(ln) |
|
if not block: continue |
|
stitched_rows, current = [], "" |
|
for ln in block: |
|
m = _ROW_START.match(ln) |
|
if m: |
|
if current: stitched_rows.append(current.strip()) |
|
current = ln |
|
else: |
|
if current: current += " " + ln.strip() |
|
if current: stitched_rows.append(current.strip()) |
|
|
|
for row in stitched_rows: |
|
m = _ROW_START.match(row) |
|
if not m: continue |
|
ops_row_count += 1 |
|
start_t, end_t, end_depth = m.group(1), m.group(2), float(m.group(3)) |
|
rest = _clean_state_prefix(row[m.end():].strip()) |
|
remark, times = rest, f"{start_t}-{end_t}" |
|
entry = {"md": end_depth, "remark": remark, "times": times} |
|
matched = False |
|
if RE_LOSSES.search(remark): groups["lost_circulation"].append(entry); matched=True |
|
if RE_STUCK.search(remark): groups["stuck_pipe"].append(entry); matched=True |
|
if RE_PACKOFF.search(remark):groups["pack_off"].append(entry); matched=True |
|
if not matched and re.search(r'\b(kick|influx|trip\s*gas|high\s+gas|h2s)\b', remark, re.I): |
|
groups["other"].append(entry) |
|
|
|
for k, lst in groups.items(): |
|
by_md: Dict[float, Dict] = {} |
|
for e in lst: |
|
key = round(e["md"], 1) |
|
if key not in by_md: |
|
by_md[key] = {"md": e["md"], "remark": e["remark"], "times": e["times"]} |
|
else: |
|
if e["remark"] not in by_md[key]["remark"]: |
|
by_md[key]["remark"] += " | " + e["remark"] |
|
if e["times"] not in by_md[key]["times"]: |
|
by_md[key]["times"] += "," + e["times"] |
|
groups[k] = list(by_md.values()) |
|
return groups, ops_row_count |
|
|
|
def _suggest_camera_params(x, y, z, base_radius=1.6, base_z=0.9, zoom_factor=0.5): |
|
span_xy = max((x.max() - x.min()), (y.max() - y.min())) |
|
span_z = (z.max() - z.min()); span = max(span_xy, span_z) |
|
radius = (base_radius + (span / 2000.0)) * zoom_factor |
|
z_eye = (base_z + (span / 3000.0)) * (zoom_factor**0.5) |
|
return radius, z_eye |
|
|
|
def make_3d_figure(df_xyz: pd.DataFrame, title="3D Well Trajectory"): |
|
x = df_xyz["E_m"].to_numpy(); y = df_xyz["N_m"].to_numpy(); z = -df_xyz["TVD_m"].to_numpy() |
|
md = df_xyz["MD_m"].to_numpy() |
|
radius0, z_eye0 = _suggest_camera_params(x, y, z, zoom_factor=0.5) |
|
camera_init = dict(eye=dict(x=radius0, y=radius0, z=z_eye0)) |
|
fig = go.Figure(go.Scatter3d( |
|
x=x, y=y, z=z, mode="lines", |
|
line=dict(width=6, color=md, colorscale="Viridis"), |
|
hovertemplate=("MD: %{customdata[0]:.2f} m<br>" |
|
"E: %{x:.2f} m | N: %{y:.2f} m<br>" |
|
"TVD: %{customdata[1]:.2f} m<extra></extra>"), |
|
customdata=np.column_stack([md, df_xyz["TVD_m"].to_numpy()]), |
|
name="Well trajectory", legendrank=1, showlegend=True |
|
)) |
|
fig.update_layout( |
|
title=title, margin=dict(l=0,r=0,t=40,b=0), showlegend=True, |
|
scene=dict( |
|
camera=camera_init, aspectmode="data", |
|
xaxis=dict(title="Easting (m)", backgroundcolor="white", showgrid=True, gridcolor="lightgrey", zeroline=False), |
|
yaxis=dict(title="Northing (m)", backgroundcolor="white", showgrid=True, gridcolor="lightgrey", zeroline=False), |
|
zaxis=dict(title="Depth (m, TVD)", backgroundcolor="white", showgrid=True, gridcolor="lightgrey", zeroline=False), |
|
) |
|
) |
|
return fig |
|
|
|
def add_camera_rotation_animation(fig: go.Figure, x, y, z, revolutions=1.0, n_frames=120, zoom_factor=0.5): |
|
radius, z_eye = _suggest_camera_params(x, y, z, zoom_factor=zoom_factor) |
|
angles = np.linspace(0, 2*np.pi*revolutions, n_frames) |
|
frames = [go.Frame(name=f"cam{a:.3f}", layout=dict(scene_camera=dict(eye=dict(x=radius*np.cos(a), y=radius*np.sin(a), z=z_eye)))) for a in angles] |
|
fig.update(frames=frames) |
|
updatemenus = list(fig.layout.updatemenus) if fig.layout.updatemenus else [] |
|
updatemenus.append(dict( |
|
type="buttons", direction="left", x=0.50, y=1.08, xanchor="center", yanchor="top", showactive=False, |
|
buttons=[ |
|
dict(label="βΆ Play", method="animate", |
|
args=[None, dict(frame=dict(duration=60, redraw=True), transition=dict(duration=0), |
|
fromcurrent=True, loop=True)]), |
|
dict(label="βΈ Pause", method="animate", |
|
args=[[None], dict(frame=dict(duration=0, redraw=False), transition=dict(duration=0), |
|
mode="immediate")]) |
|
] |
|
)) |
|
fig.update_layout(updatemenus=updatemenus) |
|
return fig |
|
|
|
def _map_md_to_xyz(df_xyz: pd.DataFrame, md_values: List[float]) -> Tuple[List[float], List[float], List[float]]: |
|
xs, ys, zs = [], [], [] |
|
arr_md = df_xyz["MD_m"].to_numpy() |
|
for md in md_values: |
|
i = int(np.argmin(np.abs(arr_md - md))) |
|
xs.append(float(df_xyz["E_m"].iloc[i])); ys.append(float(df_xyz["N_m"].iloc[i])); zs.append(float(-df_xyz["TVD_m"].iloc[i])) |
|
return xs, ys, zs |
|
|
|
def add_anomaly_category_traces(fig: go.Figure, df_xyz: pd.DataFrame, grouped: dict): |
|
category_style = { |
|
"lost_circulation": {"name": "Lost circulation", "symbol": "diamond", "rank": 100}, |
|
"stuck_pipe": {"name": "Stuck pipe", "symbol": "x", "rank": 101}, |
|
"pack_off": {"name": "Pack-off", "symbol": "square", "rank": 102}, |
|
"other": {"name": "Other (gas/kick)", "symbol": "circle-open", "rank": 103}, |
|
} |
|
ordered_keys = ["lost_circulation", "stuck_pipe", "pack_off", "other"] |
|
trace_indices = [] |
|
for key in ordered_keys: |
|
items = grouped.get(key, []) |
|
style = category_style[key] |
|
if items: |
|
md_vals = [e["md"] for e in items] |
|
ax, ay, az = _map_md_to_xyz(df_xyz, md_vals) |
|
labels = [f"{style['name']} @ {m:.0f} mMD" for m in md_vals] |
|
hover = [] |
|
for e in items: |
|
snippet = (e["remark"][:120] + "β¦") if len(e["remark"]) > 120 else e["remark"] |
|
hover.append(f"{style['name']}<br>End depth: {e['md']:.0f} mMD<br>Time: {e['times']}<br>Remark: {snippet}") |
|
fig.add_trace(go.Scatter3d( |
|
x=ax, y=ay, z=az, mode="markers+text", |
|
marker=dict(size=6, color="red", symbol=style["symbol"]), |
|
text=labels, textposition="top center", |
|
hovertext=hover, hoverinfo="text", |
|
name=style["name"], legendrank=style["rank"], visible=True, showlegend=True, |
|
)) |
|
else: |
|
fig.add_trace(go.Scatter3d( |
|
x=[np.nan], y=[np.nan], z=[np.nan], mode="markers", |
|
marker=dict(size=6, color="red", symbol=style["symbol"], opacity=0), |
|
name=style["name"], legendrank=style["rank"], hoverinfo="skip", visible=True, showlegend=True, |
|
)) |
|
trace_indices.append(len(fig.data) - 1) |
|
|
|
visible_all_on = [True] * len(fig.data) |
|
visible_all_off = [True] * len(fig.data) |
|
for i in trace_indices: visible_all_off[i] = False |
|
updatemenus = list(fig.layout.updatemenus) if fig.layout.updatemenus else [] |
|
updatemenus.append(dict( |
|
type="buttons", direction="left", x=0.50, y=1.16, xanchor="center", yanchor="top", showactive=False, |
|
buttons=[ |
|
dict(label="Anomalies: ON", method="update", args=[{"visible": visible_all_on}]), |
|
dict(label="Anomalies: OFF", method="update", args=[{"visible": visible_all_off}]), |
|
], |
|
)) |
|
fig.update_layout(updatemenus=updatemenus) |
|
return fig |
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
st.header("Upload & Settings") |
|
|
|
|
|
files = st.file_uploader("Upload daily report PDFs", type=["pdf"], accept_multiple_files=True) |
|
|
|
st.caption("Add more files anytime; dashboard updates live.") |
|
|
|
|
|
st.subheader("3D Trajectory Agent (optional)") |
|
traj_pdf = st.file_uploader("Trajectory / Survey PDF", type=["pdf"], key="traj") |
|
anomaly_pdf = st.file_uploader("Daily Report PDF (for 3D anomalies)", type=["pdf"], key="rep3d") |
|
run_3d = st.button("Run 3D Agent") |
|
|
|
st.divider() |
|
st.subheader("Classifier") |
|
DEFAULT_OPENAI_KEY = os.getenv("OPENAI_API_KEY", "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") |
|
use_openai = st.toggle("Use OpenAI Responses API", value=False) |
|
model_name = st.text_input("Model name", value="gpt-4o-mini-2024-07-18") |
|
|
|
api_key_prefill = "set via Space secret" if DEFAULT_OPENAI_KEY else "" |
|
api_key = st.text_input("OpenAI API Key", type="password", value=api_key_prefill) |
|
if api_key == "set via Space secret": |
|
api_key = DEFAULT_OPENAI_KEY |
|
st.markdown("If OFF or key missing, a heuristic will be used.") |
|
|
|
process_btn = st.button("Process files") |
|
|
|
|
|
|
|
|
|
if "reports" not in st.session_state: st.session_state.reports = [] |
|
if "chat" not in st.session_state: st.session_state.chat = [] |
|
if "traj_fig" not in st.session_state: st.session_state.traj_fig = None |
|
if "traj_summary" not in st.session_state: st.session_state.traj_summary = "" |
|
|
|
|
|
|
|
|
|
if process_btn and files: |
|
new_items = [] |
|
for f in files: |
|
try: |
|
name = f.name |
|
data = f.getvalue() |
|
text = read_pdf_text_bytes(data) |
|
|
|
s, e = extract_period_date(text) |
|
inferred = infer_date_from_filename(name) |
|
base_date = s if pd.notna(s) else (inferred if inferred else pd.Timestamp.today().normalize()) |
|
|
|
ops_df = parse_operations_depth_time(text, base_date) |
|
mud_df = parse_mud_density(text, base_date) |
|
rop_df = parse_bit_record_rop(text) |
|
downtime_min = parse_equipment_downtime_minutes(text) |
|
|
|
if use_openai and api_key.strip() and model_name.strip(): |
|
cls = classify_with_openai(text, model_name, api_key) |
|
else: |
|
cls = heuristic_classify(text) |
|
|
|
evt_depths = extract_event_depths_from_spans(cls.get("spans")) |
|
|
|
rec = { |
|
"name": name, |
|
"period_start": s if pd.notna(s) else (inferred if inferred else base_date), |
|
"period_end": e if pd.notna(e) else None, |
|
"ops_df": ops_df.to_dict("records"), |
|
"mud_df": mud_df.to_dict("records"), |
|
"rop_df": rop_df.to_dict("records"), |
|
"downtime_min": float(downtime_min), |
|
"classification": cls, |
|
"event_depths": evt_depths, |
|
"raw_bytes": data, |
|
} |
|
new_items.append(rec) |
|
except Exception as ex: |
|
st.error(f"Failed to process {f.name}: {ex}") |
|
|
|
existing = {r["name"]: r for r in st.session_state.reports} |
|
for r in new_items: existing[r["name"]] = r |
|
st.session_state.reports = list(existing.values()) |
|
st.success(f"Processed {len(new_items)} file(s).") |
|
|
|
|
|
|
|
|
|
def run_trajectory_agent(survey_bytes: bytes, report_bytes: bytes) -> Tuple[go.Figure, str]: |
|
""" |
|
Deterministic 'agent' that: |
|
1) parses survey -> min-curve recompute |
|
2) parses Ops anomalies (end depth, remark, time) |
|
3) renders 3D figure and pins category markers |
|
Returns (figure, short summary). |
|
""" |
|
survey_df = parse_survey_pdf_bytes(survey_bytes) |
|
df_mc = recompute_min_curve_with_top_lock(survey_df) |
|
x = df_mc["E_m"].to_numpy(); y = df_mc["N_m"].to_numpy(); z = -df_mc["TVD_m"].to_numpy() |
|
fig = make_3d_figure(df_mc) |
|
fig = add_camera_rotation_animation(fig, x, y, z, revolutions=1.0, n_frames=120, zoom_factor=0.5) |
|
|
|
groups, ops_rows = parse_operations_anomalies_bytes(report_bytes) |
|
counts = {k: len(v) for k, v in groups.items()} |
|
fig = add_anomaly_category_traces(fig, df_mc, groups) |
|
|
|
summary = f"Survey rows: {len(survey_df)} | Ops rows parsed: {ops_rows} | Anomalies β losses: {counts.get('lost_circulation',0)}, pack_off: {counts.get('pack_off',0)}, stuck_pipe: {counts.get('stuck_pipe',0)}, other: {counts.get('other',0)}." |
|
return fig, summary |
|
|
|
if run_3d: |
|
if not traj_pdf or not anomaly_pdf: |
|
st.sidebar.error("Please upload BOTH a trajectory (survey) PDF and a daily report PDF.") |
|
else: |
|
try: |
|
fig3d, summary = run_trajectory_agent(traj_pdf.getvalue(), anomaly_pdf.getvalue()) |
|
st.session_state.traj_fig = fig3d |
|
st.session_state.traj_summary = summary |
|
st.sidebar.success("3D Agent completed.") |
|
except Exception as e: |
|
st.sidebar.error(f"3D Agent failed: {e}") |
|
|
|
|
|
|
|
|
|
st.title("π’οΈ Drilling Report Anomaly Dashboard") |
|
|
|
reports = st.session_state.reports |
|
if not reports: |
|
st.info("Upload daily report PDFs in the sidebar to begin.") |
|
st.stop() |
|
|
|
def to_df(reports: List[Dict[str, Any]]) -> pd.DataFrame: |
|
rows = [] |
|
for r in reports: |
|
start = r["period_start"] |
|
if isinstance(start, str) and start: start = pd.to_datetime(start) |
|
cls = r["classification"]; labels = cls.get("labels", []) or [] |
|
if not labels: labels = ["none"] |
|
for lab in labels: |
|
rows.append({ |
|
"name": r["name"], |
|
"date": start.normalize() if isinstance(start, pd.Timestamp) and pd.notna(start) else pd.NaT, |
|
"label": lab, "is_anomalous": (lab != "none"), |
|
"downtime_min": r.get("downtime_min", 0.0), |
|
}) |
|
df = pd.DataFrame(rows) |
|
if not df.empty and "date" in df: |
|
mask = df["date"].isna() |
|
if mask.any(): |
|
inferred_dates = [] |
|
for nm in df.loc[mask, "name"]: |
|
d = infer_date_from_filename(nm) |
|
inferred_dates.append(d if d else pd.NaT) |
|
df.loc[mask, "date"] = inferred_dates |
|
df.sort_values(["date","name"], inplace=True, na_position="last") |
|
return df |
|
|
|
df_all = to_df(reports) |
|
|
|
left, right = st.columns([3, 2], gap="large") |
|
|
|
|
|
with left: |
|
st.subheader("Global Overview") |
|
|
|
if df_all["date"].notna().any(): |
|
unique_dates = sorted(set(pd.to_datetime(df_all["date"].dropna()).dt.date.tolist())) |
|
if len(unique_dates) >= 2: |
|
min_date, max_date = unique_dates[0], unique_dates[-1] |
|
date_range = st.slider("Date range", min_value=min_date, max_value=max_date, value=(min_date, max_date)) |
|
df_filt = df_all[(df_all["date"] >= pd.to_datetime(date_range[0])) & |
|
(df_all["date"] <= pd.to_datetime(date_range[1]))] |
|
else: |
|
st.info(f"Single date found: {unique_dates[0]}. Showing that day.") |
|
df_filt = df_all.copy() |
|
else: |
|
st.warning("Dates not found in reports or filenames; showing all.") |
|
df_filt = df_all.copy() |
|
|
|
if not df_filt.empty and df_filt["date"].notna().any(): |
|
fig = px.histogram(df_filt, x="date", color="label", barmode="stack", title="Anomaly Distribution Over Time") |
|
st.plotly_chart(fig, use_container_width=True) |
|
else: |
|
fig = px.histogram(df_filt, x="label", color="label", title="Anomaly Distribution (no dates)") |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
st.dataframe(df_filt, use_container_width=True) |
|
|
|
|
|
st.divider() |
|
st.subheader("3D Trajectory (Agent)") |
|
if st.session_state.traj_fig is not None: |
|
st.caption(st.session_state.traj_summary or "") |
|
st.plotly_chart(st.session_state.traj_fig, use_container_width=True) |
|
else: |
|
st.info("Upload a **Trajectory PDF** and a **Daily Report PDF** in the sidebar, then click **Run 3D Agent** to see the 3D view here.") |
|
|
|
|
|
with right: |
|
st.subheader("KPIs") |
|
total_reports = df_all["name"].nunique() |
|
total_anom = int(df_all["is_anomalous"].sum()) |
|
last_date = df_all["date"].dropna().max() if df_all["date"].notna().any() else None |
|
|
|
k1, k2, k3 = st.columns(3) |
|
k1.metric("Reports", total_reports) |
|
k2.metric("Anomalies", total_anom) |
|
k3.metric("Latest date", "-" if last_date is None or pd.isna(last_date) else str(last_date.date())) |
|
|
|
names = sorted({r["name"] for r in reports}) |
|
sel = st.selectbox("Select report", names, index=max(0, len(names)-1)) |
|
rep = next(r for r in reports if r["name"] == sel) |
|
|
|
cls = rep["classification"] |
|
is_anom = cls.get("is_anomalous", False) |
|
label_list = cls.get("labels", []) or [] |
|
labels_str = ", ".join(label_list) if label_list else "β" |
|
|
|
if is_anom: |
|
st.error(f"β οΈ Attention: anomaly detected β {labels_str}") |
|
if GTTS_OK and st.button("π Speak alert"): |
|
tts = gTTS(text=f"Attention. Anomaly detected. {labels_str.replace('_',' ')}.", lang='en') |
|
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False); tts.save(tmp.name) |
|
st.audio(tmp.name, format="audio/mp3") |
|
else: |
|
st.success("β
All clear: no anomaly detected.") |
|
if GTTS_OK and st.button("π Speak summary"): |
|
tts = gTTS(text="All clear. No anomaly detected. Operations normal.", lang='en') |
|
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False); tts.save(tmp.name) |
|
st.audio(tmp.name, format="audio/mp3") |
|
|
|
c1, c2 = st.columns(2) |
|
c1.metric("Anomalous?", "Yes" if is_anom else "No") |
|
c2.metric("Label(s)", labels_str) |
|
|
|
st.divider() |
|
st.caption("Report detail") |
|
ops_df = pd.DataFrame(rep["ops_df"]) |
|
mud_df = pd.DataFrame(rep["mud_df"]) |
|
rop_df = pd.DataFrame(rep["rop_df"]) |
|
|
|
if not ops_df.empty: |
|
fig = px.line(ops_df, x="mid_time", y="depth_m", title="Depth vs Time (Operations)") |
|
if is_anom: fig.update_traces(line=dict(color="#d62728")) |
|
fig.update_yaxes(autorange="reversed", title="Depth (mMD)") |
|
fig.update_xaxes(title="Time") |
|
st.plotly_chart(fig, use_container_width=True) |
|
if not mud_df.empty: |
|
fig = px.line(mud_df, x="time", y="density_gcm3", markers=True, title="Mud Density vs Time (g/cmΒ³)") |
|
if is_anom: fig.update_traces(line=dict(color="#d62728"), marker=dict(color="#d62728")) |
|
st.plotly_chart(fig, use_container_width=True) |
|
if not rop_df.empty and not pd.isna(rop_df.get("rop_m_per_hr", [np.nan])[0]): |
|
rop = float(rop_df["rop_m_per_hr"].iloc[0]) |
|
fig = go.Figure(go.Indicator(mode="number+gauge", value=rop, number={'valueformat': '.2f'}, |
|
gauge={'shape': "bullet"}, title={'text': "ROP (m/hr) β last 24h"})) |
|
fig.update_layout(height=140, margin=dict(l=30,r=30,t=30,b=10)) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
spans = cls.get("spans", []) |
|
if spans: |
|
with st.expander("Evidence spans"): |
|
for s in spans: |
|
st.write(f"- **{s.get('label','')}**: {s.get('text','')}") |
|
|
|
|
|
|
|
|
|
st.divider() |
|
st.subheader("Chat") |
|
|
|
|
|
with st.expander("ποΈ Voice question"): |
|
recorded = None |
|
if HAS_REC: |
|
st.caption("Click to start/stop recording, then press **Transcribe & Ask**.") |
|
recorded = audio_recorder(pause_threshold=3.0) |
|
voice_file = st.file_uploader("β¦or upload a short .wav/.mp3", type=["wav","mp3"], key="voice_up") |
|
if st.button("Transcribe & Ask"): |
|
audio_bytes = None |
|
if recorded: audio_bytes = recorded |
|
elif voice_file: audio_bytes = voice_file.getvalue() |
|
if not audio_bytes: |
|
st.warning("No audio captured or uploaded.") |
|
else: |
|
if api_key.strip(): |
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=api_key) |
|
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") |
|
tmp.write(audio_bytes); tmp.flush() |
|
with open(tmp.name, "rb") as fh: |
|
tr = client.audio.transcriptions.create(model="whisper-1", file=fh) |
|
voice_text = tr.text if hasattr(tr, "text") else str(tr) |
|
st.write("You said:", voice_text) |
|
st.session_state.chat.append({"role": "user", "content": voice_text}) |
|
except Exception as e: |
|
st.error(f"Transcription failed: {e}") |
|
else: |
|
st.warning("Add your OpenAI API key in the sidebar to enable voice transcription.") |
|
|
|
|
|
for m in st.session_state.chat: |
|
with st.chat_message(m["role"]): |
|
st.markdown(m["content"]) |
|
|
|
chat_q = st.chat_input("Ask about anomalies, depths, mud density, etc.") |
|
if chat_q: |
|
st.session_state.chat.append({"role": "user", "content": chat_q}) |
|
|
|
|
|
if len(st.session_state.chat) and st.session_state.chat[-1]["role"] == "user": |
|
sel_name = 'sel' in locals() and sel or reports[-1]["name"] |
|
rep = next(r for r in reports if r["name"] == sel_name) |
|
ops_df = pd.DataFrame(rep["ops_df"]); mud_df = pd.DataFrame(rep["mud_df"]) |
|
cls = rep["classification"]; is_anom = cls.get("is_anomalous", False) |
|
labels = ", ".join(cls.get("labels", [])) if cls.get("labels") else "β" |
|
ans = f"Report **{sel_name}** β anomaly: {'Yes' if is_anom else 'No'}; labels: {labels}. " |
|
if not ops_df.empty: |
|
ans += f"Ops depth range: {int(ops_df['depth_m'].min())}-{int(ops_df['depth_m'].max())} mMD. " |
|
if not mud_df.empty: |
|
ans += f"Mud density range: {mud_df['density_gcm3'].min():.2f}-{mud_df['density_gcm3'].max():.2f} g/cmΒ³. " |
|
st.session_state.chat.append({"role": "assistant", "content": ans}) |
|
with st.chat_message("assistant"): |
|
st.markdown(ans) |
|
|
|
st.caption("Dates parsed from report headers or inferred from filenames. 3D agent uses Survey + Daily Report from the sidebar.") |
|
|