import csv from pathlib import Path from datetime import datetime from urllib.parse import quote from collections import Counter from jinja2 import Template # Couleurs par catégorie (avec repli sur une couleur neutre). CATEGORY_COLORS = { "Cyberharcèlement": "#8e44ad", "Menace": "#c0392b", "Insulte": "#d35400", "Harcèlement": "#e74c3c", "Non-harcèlement": "#27ae60", "Sans_Texte": "#7f8c8d", "Inclassable": "#95a5a6", "Non-classifié": "#bdc3c7", } DEFAULT_COLOR = "#34495e" TEMPLATE = """ Rapport de classification

Rapport de classification des tweets

Généré le {{ generated_at }} — {{ items|length }} élément(s)

{{ items|length }}
Total
{% for cat, count in category_counts %}
{{ count }}
{{ cat }}
{% endfor %}
{% for cat, count in category_counts %} {% endfor %}
""" class WebReportGenerator: def __init__(self, csv_path: Path, output_dir: Path = Path("captures/ok")): self.csv_path = Path(csv_path).resolve() self.output_dir = Path(output_dir).resolve() # Si le CSV n'existe pas, on le cherche dans output_dir. if not self.csv_path.exists(): potential_path = self.output_dir / self.csv_path.name if potential_path.exists(): self.csv_path = potential_path def _resolve_relative_path(self, row: dict) -> str: """ Détermine le chemin de l'image relatif au rapport HTML (placé dans output_dir), encodé pour une URL. Robustesse : si le chemin enregistré n'existe pas (CSV obsolète d'avant le déplacement), on reconstruit le chemin attendu ``output_dir/catégorie/fichier``. """ image_path = Path(row['filepath']) category = row.get('detected_category') or '' candidates = [image_path] if category: candidates.append(self.output_dir / category / image_path.name) candidates.append(self.output_dir / image_path.name) chosen = next((c for c in candidates if c.exists()), image_path) try: relative = chosen.relative_to(self.output_dir) except ValueError: # Repli : catégorie/fichier, sinon juste le nom du fichier. relative = Path(category) / image_path.name if category else Path(image_path.name) # Encodage URL (espaces, apostrophes typographiques, accents…) en # préservant les séparateurs de dossiers. return quote(relative.as_posix()) @staticmethod def _confidence_fields(raw_value: str) -> dict: try: value = float(raw_value) except (TypeError, ValueError): value = 0.0 pct = round(value * 100) if pct >= 60: conf_class = "conf-high" elif pct >= 35: conf_class = "conf-mid" else: conf_class = "conf-low" return {"confidence_value": value, "confidence_pct": pct, "conf_class": conf_class} def generate(self): items = [] if self.csv_path.exists(): with open(self.csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: if row.get('status') == 'processed': row['relative_filepath'] = self._resolve_relative_path(row) row.update(self._confidence_fields(row.get('confidence'))) items.append(row) # Tri par défaut : confiance décroissante. items.sort(key=lambda r: r['confidence_value'], reverse=True) category_counts = Counter(item['detected_category'] for item in items) # Catégories triées par effectif décroissant. sorted_counts = sorted(category_counts.items(), key=lambda kv: (-kv[1], kv[0])) self.output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = self.output_dir / f"report_{timestamp}.html" template = Template(TEMPLATE) html = template.render( items=items, category_counts=sorted_counts, category_colors=CATEGORY_COLORS, default_color=DEFAULT_COLOR, generated_at=datetime.now().strftime("%d/%m/%Y à %H:%M"), ) with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"Rapport généré : {output_path}") return output_path