TangibleResearch · Reboy20000 · May 26, 2026 · May 25, 2026
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.DS_Store
+*.egg-info/
+dist/
+build/
+.venv/
+venv/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/Halgorithem/__init__.py b/Halgorithem/__init__.py
@@ -1,3 +1,4 @@
 from .core import Halgorithm
+from .main import HalgorithemVerifier
 
-__all__ = ["Halgorithm"]
+__all__ = ["Halgorithm", "HalgorithemVerifier"]
diff --git a/Halgorithem/__pycache__/__init__.cpython-312.pyc b/Halgorithem/__pycache__/__init__.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/claim_extraction.cpython-312.pyc b/Halgorithem/__pycache__/claim_extraction.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/confidence.cpython-312.pyc b/Halgorithem/__pycache__/confidence.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/contradiction.cpython-312.pyc b/Halgorithem/__pycache__/contradiction.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/core.cpython-312.pyc b/Halgorithem/__pycache__/core.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/evidence.cpython-312.pyc b/Halgorithem/__pycache__/evidence.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/math_utils.cpython-312.pyc b/Halgorithem/__pycache__/math_utils.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/nlp.cpython-312.pyc b/Halgorithem/__pycache__/nlp.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/retrieval.cpython-312.pyc b/Halgorithem/__pycache__/retrieval.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/source_quality.cpython-312.pyc b/Halgorithem/__pycache__/source_quality.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/temporal.cpython-312.pyc b/Halgorithem/__pycache__/temporal.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/text_processing.cpython-312.pyc b/Halgorithem/__pycache__/text_processing.cpython-312.pyc
diff --git a/Halgorithem/__pycache__/web.cpython-312.pyc b/Halgorithem/__pycache__/web.cpython-312.pyc
diff --git a/Halgorithem/checks/__init__.py b/Halgorithem/checks/__init__.py
@@ -0,0 +1,5 @@
+from .atomic import atomic_claim_nli
+from .nli import sentence_nli
+from .similarity import similarity_search
+
+__all__ = ["atomic_claim_nli", "sentence_nli", "similarity_search"]
diff --git a/Halgorithem/checks/atomic.py b/Halgorithem/checks/atomic.py
@@ -0,0 +1,56 @@
+from .nli import NLIModel
+from .utils import token_set
+from ..models import AtomicCheck, AtomicClaimResult, IngestedDocument, ProcessedSentence
+
+
+def claim_text(claim):
+    return claim.text or f"{claim.subject} {claim.relation} {claim.object}".strip()
+
+
+def claim_overlap(left, right):
+    left_text = f"{left.subject} {left.relation} {left.object}".strip() or claim_text(left)
+    right_text = f"{right.subject} {right.relation} {right.object}".strip() or claim_text(right)
+    left_tokens = token_set(left_text)
+    right_tokens = token_set(right_text)
+    if not left_tokens:
+        return 0.0
+    return len(left_tokens & right_tokens) / len(left_tokens)
+
+
+def atomic_claim_nli(ai_sentence: ProcessedSentence, document: IngestedDocument, *, nli_model=None):
+    nli_model = nli_model or NLIModel()
+    results = []
+    for ai_claim in ai_sentence.claims:
+        candidates = sorted(document.claims, key=lambda doc_claim: claim_overlap(ai_claim, doc_claim), reverse=True)
+        best_claim = candidates[0] if candidates else None
+        if not best_claim:
+            results.append(AtomicClaimResult(claim=claim_text(ai_claim), verdict="NEUTRAL", confidence=0.5))
+            continue
+        verdict, confidence = nli_model.predict(claim_text(best_claim), claim_text(ai_claim))
+        results.append(
+            AtomicClaimResult(
+                claim=claim_text(ai_claim),
+                verdict=verdict,
+                confidence=confidence,
+                evidence=claim_text(best_claim),
+            )
+        )
+
+    evidence = ""
+    if results:
+        evidence = max(results, key=lambda result: result.confidence).evidence
+    return AtomicCheck(claims=results, score=score_atomic_results(results), evidence=evidence)
+
+
+def score_atomic_results(results):
+    if not results:
+        return None
+    scores = []
+    for result in results:
+        if result.verdict == "ENTAIL":
+            scores.append(result.confidence)
+        elif result.verdict == "CONTRADICT":
+            scores.append(1.0 - result.confidence)
+        else:
+            scores.append(0.5)
+    return sum(scores) / len(scores)
diff --git a/Halgorithem/checks/nli.py b/Halgorithem/checks/nli.py
@@ -0,0 +1,115 @@
+import os
+import warnings
+
+import torch
+
+from .similarity import similarity_search
+from .units import normalize_units, unit_representation_mismatch
+from .utils import clamp, overlap_ratio
+from ..contradiction import find_contradiction
+from ..models import IngestedDocument, NLICheck, ProcessedSentence
+from ..text_processing import extract_numbers, has_negation_mismatch
+
+
+class NLIModel:
+    def __init__(self, model_name=None):
+        self.model_name = model_name or os.getenv("HALGORITHEM_NLI_MODEL", "cross-encoder/nli-deberta-v3-large")
+        self.kind = "deberta-nli"
+        self.fallback_reason = None
+        if self.model_name.lower() in {"rule", "local", "deterministic"}:
+            self.kind = "rule"
+            self.model_name = "rule"
+            self.tokenizer = None
+            self.model = None
+            return
+        try:
+            from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+            allow_download = os.getenv("HALGORITHEM_ALLOW_MODEL_DOWNLOAD", "").lower() in {"1", "true", "yes"}
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, local_files_only=not allow_download)
+            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name, local_files_only=not allow_download)
+            self.model.eval()
+        except Exception as exc:
+            warnings.warn(
+                f"Could not load NLI model {self.model_name!r} ({exc}); using deterministic NLI fallback.",
+                RuntimeWarning,
+            )
+            self.kind = "rule"
+            self.fallback_reason = str(exc)
+            self.tokenizer = None
+            self.model = None
+
+    def predict(self, premise, hypothesis):
+        if self.model is None:
+            return rule_nli(premise, hypothesis)
+        inputs = self.tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():
+            logits = self.model(**inputs).logits[0]
+        probs = torch.softmax(logits, dim=-1)
+        labels = self.model.config.id2label
+        best_idx = int(torch.argmax(probs).item())
+        raw_label = labels.get(best_idx, str(best_idx)).upper()
+        confidence = float(probs[best_idx].item())
+        if "ENTAIL" in raw_label:
+            verdict = "ENTAIL"
+        elif "CONTRAD" in raw_label:
+            verdict = "CONTRADICT"
+        else:
+            verdict = "NEUTRAL"
+        return verdict, confidence
+
+    @property
+    def diagnostics(self):
+        return {"nli": self.kind, "nli_model": self.model_name if self.kind != "rule" else "rule", "nli_fallback_reason": self.fallback_reason}
+
+
+def rule_nli(premise, hypothesis):
+    chunk = {"text": premise, "numbers": extract_numbers(premise)}
+    contradiction = find_contradiction(
+        claim=hypothesis,
+        chunk=chunk,
+        extract_numbers=extract_numbers,
+        has_negation_mismatch=has_negation_mismatch,
+        score=1.0,
+        threshold=0.0,
+    )
+    if contradiction:
+        return "CONTRADICT", 0.86
+    overlap = overlap_ratio(hypothesis, premise)
+    if overlap >= 0.72:
+        return "ENTAIL", clamp(0.55 + overlap * 0.4)
+    if overlap >= 0.35:
+        return "NEUTRAL", clamp(0.50 + overlap * 0.25)
+    return "NEUTRAL", 0.62
+
+
+def sentence_nli(ai_sentence: ProcessedSentence, document: IngestedDocument, *, nli_model=None, top_k=5):
+    nli_model = nli_model or NLIModel()
+    hits = similarity_search(ai_sentence, document, top_k=top_k).hits
+    if not hits:
+        return NLICheck(verdict="NEUTRAL", confidence=0.0)
+    best_score = hits[0].score
+    relevant_hits = [
+        hit
+        for hit in hits
+        if hit.score >= 0.4 and hit.score >= best_score * 0.75
+    ] or [hits[0]]
+    premise = " ".join(hit.sentence for hit in relevant_hits)
+    normalized_premise, premise_unit_changes = normalize_units(premise)
+    normalized_hypothesis, hypothesis_unit_changes = normalize_units(ai_sentence.resolved_text)
+    unit_mismatch = unit_representation_mismatch(premise, ai_sentence.resolved_text)
+    verdict, confidence = nli_model.predict(normalized_premise, normalized_hypothesis)
+    unit_details = []
+    if unit_mismatch:
+        unit_details.append(unit_mismatch)
+    unit_details.extend({"source_change": change} for change in premise_unit_changes)
+    unit_details.extend({"response_change": change} for change in hypothesis_unit_changes)
+    return NLICheck(
+        verdict=verdict,
+        confidence=confidence,
+        evidence=hits[0].sentence,
+        evidence_index=hits[0].sentence_index,
+        unit_mismatch=bool(unit_mismatch),
+        unit_representation_change=bool(unit_mismatch),
+        unit_details=unit_details,
+    )
diff --git a/Halgorithem/checks/similarity.py b/Halgorithem/checks/similarity.py
@@ -0,0 +1,32 @@
+from .utils import clamp
+from ..model_runtime import default_embedder, default_reranker
+from ..models import IngestedDocument, ProcessedSentence, SimilarityCheck, SimilarityHit
+
+
+def similarity_search(ai_sentence: ProcessedSentence, document: IngestedDocument, *, embedder=None, reranker=None, top_k=5):
+    embedder = embedder or default_embedder()
+    reranker = reranker or default_reranker()
+    query = embedder.encode(ai_sentence.resolved_text)
+    hits = []
+    for doc_sentence in document.sentences:
+        score = clamp(embedder.similarity(query, doc_sentence.embedding))
+        hits.append(
+            SimilarityHit(
+                sentence_index=doc_sentence.index,
+                sentence=doc_sentence.text,
+                score=score,
+                source=doc_sentence.source,
+                source_quality=doc_sentence.source_quality,
+            )
+        )
+    hits.sort(key=lambda hit: hit.score, reverse=True)
+    shortlist = hits[:max(20, top_k)]
+    top_hits = reranker.rerank(ai_sentence.resolved_text, shortlist, text_fn=lambda hit: hit.sentence, top_k=top_k)
+    best = top_hits[0] if top_hits else None
+    return SimilarityCheck(
+        score=best.score if best else 0.0,
+        evidence=best.sentence if best else "",
+        source=best.source if best else "",
+        source_quality=best.source_quality if best else 0.65,
+        hits=top_hits,
+    )
diff --git a/Halgorithem/checks/units.py b/Halgorithem/checks/units.py
@@ -0,0 +1,106 @@
+import re
+
+
+UNIT_ALIASES = {
+    "g": "gram",
+    "gram": "gram",
+    "grams": "gram",
+    "kg": "kilogram",
+    "kilogram": "kilogram",
+    "kilograms": "kilogram",
+    "m": "meter",
+    "meter": "meter",
+    "meters": "meter",
+    "km": "kilometer",
+    "kilometer": "kilometer",
+    "kilometers": "kilometer",
+    "mile": "mile",
+    "miles": "mile",
+    "c": "celsius",
+    "celsius": "celsius",
+    "f": "fahrenheit",
+    "fahrenheit": "fahrenheit",
+}
+
+NORMALIZATION = {
+    "gram": ("kilogram", 0.001, 0.0),
+    "kilogram": ("kilogram", 1.0, 0.0),
+    "meter": ("meter", 1.0, 0.0),
+    "kilometer": ("meter", 1000.0, 0.0),
+    "mile": ("meter", 1609.34, 0.0),
+}
+
+QUANTITY_RE = re.compile(r"\b(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>[A-Za-z]+)\b")
+
+
+def format_number(value):
+    if abs(value - round(value)) < 1e-9:
+        return str(int(round(value)))
+    return f"{value:.6f}".rstrip("0").rstrip(".")
+
+
+def normalized_quantity(value, unit):
+    canonical = UNIT_ALIASES.get(unit.lower())
+    if not canonical:
+        return None
+    if canonical == "celsius":
+        return float(value), "celsius"
+    if canonical == "fahrenheit":
+        return (float(value) - 32.0) * 5.0 / 9.0, "celsius"
+    target = NORMALIZATION.get(canonical)
+    if not target:
+        return None
+    target_unit, factor, offset = target
+    return float(value) * factor + offset, target_unit
+
+
+def normalize_units(sentence):
+    changes = []
+
+    def replace(match):
+        raw_value = match.group("value")
+        raw_unit = match.group("unit")
+        normalized = normalized_quantity(raw_value, raw_unit)
+        if not normalized:
+            return match.group(0)
+        normalized_value, normalized_unit = normalized
+        normalized_text = f"{format_number(normalized_value)} {normalized_unit}"
+        original_text = match.group(0)
+        if original_text.lower() != normalized_text.lower():
+            changes.append(
+                {
+                    "original": original_text,
+                    "normalized": normalized_text,
+                    "value": normalized_value,
+                    "unit": normalized_unit,
+                }
+            )
+        return normalized_text
+
+    return QUANTITY_RE.sub(replace, sentence or ""), changes
+
+
+def unit_representation_mismatch(left, right, tolerance=0.03):
+    left_quantities = [
+        (match.group(0), *normalized_quantity(match.group("value"), match.group("unit")))
+        for match in QUANTITY_RE.finditer(left or "")
+        if normalized_quantity(match.group("value"), match.group("unit"))
+    ]
+    right_quantities = [
+        (match.group(0), *normalized_quantity(match.group("value"), match.group("unit")))
+        for match in QUANTITY_RE.finditer(right or "")
+        if normalized_quantity(match.group("value"), match.group("unit"))
+    ]
+    for left_original, left_value, left_unit in left_quantities:
+        for right_original, right_value, right_unit in right_quantities:
+            if left_unit != right_unit or left_original.lower() == right_original.lower():
+                continue
+            if right_value == 0:
+                continue
+            if abs(left_value - right_value) / abs(right_value) <= tolerance:
+                return {
+                    "source": left_original,
+                    "response": right_original,
+                    "normalized": f"{format_number(left_value)} {left_unit}",
+                }
+    return None
diff --git a/Halgorithem/checks/utils.py b/Halgorithem/checks/utils.py
@@ -0,0 +1,17 @@
+import re
+
+
+def clamp(value, low=0.0, high=1.0):
+    return max(low, min(high, float(value)))
+
+
+def token_set(text):
+    return set(re.findall(r"[a-z0-9]+", (text or "").lower()))
+
+
+def overlap_ratio(left, right):
+    left_tokens = {t for t in token_set(left) if len(t) > 2}
+    right_tokens = {t for t in token_set(right) if len(t) > 2}
+    if not left_tokens:
+        return 0.0
+    return len(left_tokens & right_tokens) / len(left_tokens)
diff --git a/Halgorithem/confidence.py b/Halgorithem/confidence.py
@@ -41,15 +41,27 @@ def classify_support(score, threshold=0.30, contradiction=None, unsupported_term
     unsupported_terms = unsupported_terms or []
     supported_threshold = max(threshold + 0.10, 0.40)
 
-    hard_contradiction = contradiction and contradiction.get("reason") in {
-        "Date mismatch", "Number mismatch", "Unit mismatch", "Negation mismatch"
+    numeric_or_logical_contradiction = contradiction and contradiction.get("reason") in {
+        "Date mismatch",
+        "Number mismatch",
+        "Unit mismatch",
+        "Negation mismatch",
     }
-    if hard_contradiction:
+    relation_contradiction = contradiction and contradiction.get("reason") in {
+        "Location mismatch",
+        "Entity-role mismatch",
+        "Source qualifier mismatch",
+    }
+    if contradiction and contradiction.get("reason") == "Number mismatch" and unsupported_terms:
+        return "HALLUCINATION"
+    if numeric_or_logical_contradiction:
         return "CONTRADICTION"
     if unsupported_terms and is_negative_claim(claim):
         return "UNVERIFIABLE_DENIAL"
     if unsupported_terms:
         return "HALLUCINATION"
+    if relation_contradiction:
+        return "CONTRADICTION"
     if contradiction:
         return "CONTRADICTION"
     if is_inferential_claim(claim) and score >= 0.08:
@@ -58,6 +70,8 @@ def classify_support(score, threshold=0.30, contradiction=None, unsupported_term
         return "SUPPORTED"
     if score >= threshold:
         return "WEAK_SUPPORT"
+    if is_negative_claim(claim):
+        return "UNVERIFIABLE_DENIAL"
     return "HALLUCINATION"