4: debugging flag

This commit is contained in:
Bora 2026-01-01 11:28:57 +01:00
parent 8d1b45ce73
commit cfc74cc3f2
3 changed files with 157 additions and 88 deletions

176
app.py
View File

@ -15,6 +15,24 @@ from config import Config
from inference import InferenceWorker
from manager import CameraManager
# ------------------------------------------------------------------------------
# 1. USER CONFIGURATION (Edit these values here)
# ------------------------------------------------------------------------------
# Enable verbose debug logs (True = verbose, False = quiet/crucial only)
DEBUG_LOG = False
# Rate Limiting: How many seconds to wait between detections per camera
DETECTION_INTERVAL = 10
# Frame Quality Threshold: Skip images with standard deviation lower than this.
# - Low values (1-5) allow darker/low-contrast images (good for night).
# - High values (20-40) filter out gray/blank screens but might skip valid dark images.
# - Set to 0 to disable this check entirely.
FRAME_STD_THRESHOLD = 5.0
# ------------------------------------------------------------------------------
def _cfg(*names, default=None):
"""Return first matching attribute from Config, else default."""
@ -24,7 +42,8 @@ def _cfg(*names, default=None):
return default
LOG_LEVEL = _cfg("LOG_LEVEL", "LOGLEVEL", default=logging.INFO)
# --- Logging setup ---
LOG_LEVEL = logging.DEBUG if DEBUG_LOG else _cfg("LOG_LEVEL", "LOGLEVEL", default=logging.INFO)
logging.basicConfig(
level=LOG_LEVEL,
format='%(asctime)s [%(levelname)s] %(message)s',
@ -34,12 +53,12 @@ logger = logging.getLogger(__name__)
app = Flask(__name__)
# --- Initialize Components ---
# --- Initialize components ---
camera_manager = CameraManager()
inference_worker = InferenceWorker()
inference_worker = InferenceWorker(debug_log=DEBUG_LOG)
inference_worker.start()
# --- MQTT Setup ---
# --- MQTT setup ---
mqtt_client = mqtt.Client()
MQTT_USERNAME = _cfg("MQTT_USERNAME", "MQTTUSERNAME", default=None)
@ -59,7 +78,45 @@ except Exception as e:
logger.error("Failed to connect to MQTT Broker: %s", e)
# --- Helper Functions (UI Only) ---
# --- Helpers ---
_last_log = {}
def log_rl(level, key, msg, every_s=10):
"""Rate-limited log. Use for noisy conditions."""
now = time.time()
last = _last_log.get(key, 0.0)
if now - last >= every_s:
_last_log[key] = now
logger.log(level, msg)
def log_debug(key, msg, every_s=0):
"""Debug-only logging with optional rate limiting."""
if not DEBUG_LOG:
return
if every_s and every_s > 0:
log_rl(logging.DEBUG, key, msg, every_s=every_s)
else:
logger.debug(msg)
def log_condition(camera_id: str, cond_key: str, msg: str, *, crucial=False,
debug_level=logging.DEBUG, debug_every=5,
nodebug_level=logging.WARNING, nodebug_every=60):
"""Log conditions (skip reasons, degraded state) without spamming.
- If DEBUG_LOG=True -> frequent detailed logs.
- If DEBUG_LOG=False -> only rate-limited warnings for crucial conditions.
"""
key = f"{camera_id}:{cond_key}"
if DEBUG_LOG:
log_rl(debug_level, key, msg, every_s=debug_every)
return
if crucial:
log_rl(nodebug_level, key, msg, every_s=nodebug_every)
def crop_image_for_ui(image, roi_list, scaleX, scaleY):
"""Helper for the /crop endpoint (UI preview only)."""
cropped_images = []
@ -89,6 +146,7 @@ def publish_detected_number(camera_id, detected_number, confidence=None):
try:
mqtt_client.publish(topic, payload)
# Keep this INFO even when debug is off: it's the primary business output.
log_msg = f"Published to {topic}: {detected_number}"
if confidence is not None:
log_msg += f" (Conf: {confidence:.2f})"
@ -97,43 +155,31 @@ def publish_detected_number(camera_id, detected_number, confidence=None):
logger.error("MQTT Publish failed: %s", e)
# --- Debug helpers ---
_last_log = {}
def log_rl(level, key, msg, every_s=10):
now = time.time()
last = _last_log.get(key, 0.0)
if now - last >= every_s:
_last_log[key] = now
logger.log(level, msg)
# --- Main Processing Loop (Refactored) ---
# --- Main processing loop ---
last_processed_time = {}
def process_all_cameras():
"""Revised loop with rate limiting + debug instrumentation."""
DETECTION_INTERVAL = int(_cfg("DETECTION_INTERVAL", default=10))
hb_last = 0.0
while True:
try:
# Heartbeat (proves loop is alive even when no publishes happen)
now = time.time()
if now - hb_last >= 5.0:
hb_last = now
in_q = getattr(inference_worker, "input_queue", None)
out_q = getattr(inference_worker, "result_queue", None)
logger.info(
"HB mainloop alive; in_q=%s out_q=%s dropped=%s processed=%s last_invoke_s=%s",
(in_q.qsize() if in_q else "n/a"),
(out_q.qsize() if out_q else "n/a"),
getattr(inference_worker, "dropped_tasks", "n/a"),
getattr(inference_worker, "processed_tasks", "n/a"),
getattr(inference_worker, "last_invoke_secs", "n/a"),
)
# Heartbeat only in debug mode
if DEBUG_LOG:
now = time.time()
if now - hb_last >= 5.0:
hb_last = now
in_q = getattr(inference_worker, "input_queue", None)
out_q = getattr(inference_worker, "result_queue", None)
logger.info(
"HB mainloop alive; in_q=%s out_q=%s dropped=%s processed=%s last_invoke_s=%s",
(in_q.qsize() if in_q else "n/a"),
(out_q.qsize() if out_q else "n/a"),
getattr(inference_worker, "dropped_tasks", "n/a"),
getattr(inference_worker, "processed_tasks", "n/a"),
getattr(inference_worker, "last_invoke_secs", "n/a"),
)
# --- Part 1: Process Results ---
# --- Part 1: process results ---
while True:
result = inference_worker.get_result()
if not result:
@ -141,11 +187,10 @@ def process_all_cameras():
cam_id = result.get('camera_id')
# End-to-end latency tracing
task_ts = result.get("task_ts")
if task_ts is not None:
# Debug-only latency trace
if DEBUG_LOG and result.get("task_ts") is not None:
try:
age = time.time() - float(task_ts)
age = time.time() - float(result["task_ts"])
logger.info(
"Result cam=%s type=%s task_id=%s age_s=%.3f timing=%s",
cam_id,
@ -162,11 +207,20 @@ def process_all_cameras():
conf = result.get('confidence')
camera_manager.results[cam_id] = val
publish_detected_number(cam_id, val, conf)
elif result.get('type') == 'error':
msg = result.get('message', 'Unknown error')
logger.warning("[%s] Detection skipped: %s", cam_id, msg)
# --- Part 2: Feed Frames ---
# When debug is off, avoid spamming "Low confidence" messages.
if DEBUG_LOG:
logger.warning("[%s] Detection skipped: %s", cam_id, msg)
else:
# Crucial errors: rate-limited warnings.
# Filter out "Low confidence" unless it's crucial for you.
if not str(msg).lower().startswith("low confidence"):
log_condition(cam_id, "detect_error", f"[{cam_id}] Detection skipped: {msg}", crucial=True)
# --- Part 2: feed frames ---
camera_manager.load_roi_config()
for camera_id, camera_data in camera_manager.cameras.items():
@ -177,45 +231,40 @@ def process_all_cameras():
last_time = last_processed_time.get(camera_id, 0.0)
if current_time - last_time < DETECTION_INTERVAL:
log_rl(
logging.DEBUG,
f"{camera_id}:rate",
f"[{camera_id}] skip: rate limit ({current_time - last_time:.2f}s<{DETECTION_INTERVAL}s)",
every_s=30,
)
log_debug(f"{camera_id}:rate", f"[{camera_id}] skip: rate limit", every_s=30)
continue
stream = camera_data.get("stream")
if not stream:
log_rl(logging.WARNING, f"{camera_id}:nostream", f"[{camera_id}] skip: no stream", every_s=10)
log_condition(camera_id, "nostream", f"[{camera_id}] skip: no stream", crucial=True)
continue
# Warmup check
start_time = getattr(stream, "start_time", getattr(stream, "starttime", None))
if start_time is not None and (current_time - start_time) < 5:
log_rl(logging.DEBUG, f"{camera_id}:warmup", f"[{camera_id}] skip: warmup", every_s=10)
log_debug(f"{camera_id}:warmup", f"[{camera_id}] skip: warmup", every_s=10)
continue
frame = stream.read()
if frame is None:
log_rl(logging.WARNING, f"{camera_id}:noframe", f"[{camera_id}] skip: frame is None", every_s=5)
log_condition(camera_id, "noframe", f"[{camera_id}] skip: frame is None", crucial=True)
continue
# STD Check
frame_std = float(np.std(frame))
if frame_std < 5:
log_rl(
logging.INFO,
f"{camera_id}:lowstd",
f"[{camera_id}] skip: low frame std={frame_std:.2f} (<10) (disturbed/blank/frozen?)",
every_s=5,
if frame_std < FRAME_STD_THRESHOLD:
log_condition(
camera_id,
"lowstd",
f"[{camera_id}] skip: low frame std={frame_std:.2f} (<{FRAME_STD_THRESHOLD})",
crucial=True,
debug_every=5,
nodebug_every=60,
)
mqtt_client.publish(f"{Config.MQTT_TOPIC}/{camera_id}/status", "disturbed")
continue
roi_list = camera_manager.rois.get(camera_id, [])
if not roi_list:
log_rl(logging.WARNING, f"{camera_id}:norois", f"[{camera_id}] skip: no ROIs", every_s=30)
log_condition(camera_id, "norois", f"[{camera_id}] skip: no ROIs configured", crucial=True)
continue
inference_worker.add_task(camera_id, roi_list, frame, frame_std=frame_std)
@ -229,7 +278,7 @@ def process_all_cameras():
time.sleep(5)
# --- Flask Routes ---
# --- Flask routes ---
@app.route('/')
def index():
return render_template('index.html')
@ -405,7 +454,8 @@ def detect_digits():
if p['confidence'] < CONFIDENCE_THRESHOLD:
msg = f"Digit {i} ('{p['digit']}') rejected: conf {p['confidence']:.2f} < {CONFIDENCE_THRESHOLD}"
rejected_reasons.append(msg)
logger.warning("[Manual] %s", msg)
if DEBUG_LOG:
logger.warning("[Manual] %s", msg)
else:
valid_digits_str.append(p['digit'])
confidences.append(p['confidence'])
@ -419,7 +469,8 @@ def detect_digits():
if not (MIN_VALUE <= final_number <= MAX_VALUE):
msg = f"Value {final_number} out of range ({MIN_VALUE}-{MAX_VALUE})"
logger.warning("[Manual] %s", msg)
if DEBUG_LOG:
logger.warning("[Manual] %s", msg)
return jsonify({'error': 'Value out of range', 'value': final_number}), 400
avg_conf = float(np.mean(confidences)) if confidences else None
@ -448,7 +499,6 @@ def update_camera_config():
return jsonify({"success": success})
# --- Main ---
if __name__ == '__main__':
t = threading.Thread(target=process_all_cameras, daemon=True)
t.start()

View File

@ -9,8 +9,22 @@ import tflite_runtime.interpreter as tflite
from config import Config
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------------------
# 1. USER CONFIGURATION (Edit these values here)
# ------------------------------------------------------------------------------
# Minimum confidence (0-1) to accept a digit.
# - Higher (0.85-0.90) reduces false positives like "1010" from noise.
# - Lower (0.70-0.75) helps with weak/dark digits.
CONFIDENCE_THRESHOLD = 0.1
# Minimum and Maximum expected values for the number.
MIN_VALUE = 5
MAX_VALUE = 100
# ------------------------------------------------------------------------------
logger = logging.getLogger(__name__)
def _cfg(*names, default=None):
for n in names:
@ -20,7 +34,9 @@ def _cfg(*names, default=None):
class InferenceWorker:
def __init__(self):
def __init__(self, debug_log: bool = False):
self.debug_log = bool(debug_log)
self.input_queue = queue.Queue(maxsize=10)
self.result_queue = queue.Queue()
self.running = False
@ -36,10 +52,10 @@ class InferenceWorker:
self.processed_tasks = 0
self.last_invoke_secs = None
# Validation thresholds
self.CONFIDENCE_THRESHOLD = 0.10
self.MIN_VALUE = 5
self.MAX_VALUE = 100
# Set thresholds from top-level variables
self.CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD
self.MIN_VALUE = MIN_VALUE
self.MAX_VALUE = MAX_VALUE
self.load_model()
@ -55,7 +71,8 @@ class InferenceWorker:
self.output_details = self.interpreter.get_output_details()
self.original_input_shape = self.input_details[0]['shape']
logger.info("Model loaded. Default input shape: %s", self.original_input_shape)
if self.debug_log:
logger.info("Model loaded. Default input shape: %s", self.original_input_shape)
except Exception as e:
logger.critical("Failed to load TFLite model: %s", e)
@ -101,11 +118,9 @@ class InferenceWorker:
return None
def _put_result(self, d):
"""Best-effort put so failures never go silent."""
try:
self.result_queue.put(d, block=False)
except Exception:
# Should be extremely rare; log + drop
logger.exception("Failed to enqueue result")
def _worker_loop(self):
@ -121,18 +136,22 @@ class InferenceWorker:
task_id = task.get('task_id')
task_ts = task.get('timestamp')
try:
age_s = (time.time() - task_ts) if task_ts else None
logger.info(
"Worker got task cam=%s task_id=%s age_s=%s frame_std=%s rois=%d in_q=%d",
cam_id,
task_id,
(f"{age_s:.3f}" if age_s is not None else "n/a"),
task.get('frame_std'),
len(rois) if rois else 0,
self.input_queue.qsize(),
)
if self.debug_log:
try:
age_s = (time.time() - task_ts) if task_ts else None
logger.info(
"Worker got task cam=%s task_id=%s age_s=%s frame_std=%s rois=%d in_q=%d",
cam_id,
task_id,
(f"{age_s:.3f}" if age_s is not None else "n/a"),
task.get('frame_std'),
len(rois) if rois else 0,
self.input_queue.qsize(),
)
except Exception:
pass
try:
t0 = time.time()
crops = self._crop_rois(frame, rois)
t_crop = time.time()
@ -213,6 +232,7 @@ class InferenceWorker:
'task_ts': task_ts,
'timing_s': {'crop': t_crop - t0, 'predict': t_pred - t_crop, 'total': t_pred - t0},
})
self.processed_tasks += 1
else:
self._put_result({
'type': 'error',
@ -224,8 +244,6 @@ class InferenceWorker:
'timing_s': {'crop': t_crop - t0, 'predict': t_pred - t_crop, 'total': t_pred - t0},
})
self.processed_tasks += 1
except Exception:
logger.exception("Inference error cam=%s task_id=%s", cam_id, task_id)
self._put_result({
@ -275,7 +293,7 @@ class InferenceWorker:
input_tensor = np.array(batch_input)
# NOTE: Keeping original behavior (resize+allocate) but timing it.
# Keep current behavior (resize+allocate per batch). Debug timing is optional.
self.interpreter.resize_tensor_input(input_index, [num_images, target_h, target_w, 3])
self.interpreter.allocate_tensors()
@ -284,7 +302,8 @@ class InferenceWorker:
t0 = time.time()
self.interpreter.invoke()
self.last_invoke_secs = time.time() - t0
if self.last_invoke_secs > 1.0:
if self.debug_log and self.last_invoke_secs and self.last_invoke_secs > 1.0:
logger.warning("Slow invoke: %.3fs (batch=%d)", self.last_invoke_secs, num_images)
output_data = self.interpreter.get_tensor(output_index)
@ -293,7 +312,7 @@ class InferenceWorker:
for i in range(num_images):
logits = output_data[i]
# More stable softmax
# Numerically stable softmax
logits = logits - np.max(logits)
ex = np.exp(logits)
denom = np.sum(ex)