Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Halgorithem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .core import Halgorithm
from .main import HalgorithemVerifier, verify, verify_urls

__all__ = ["Halgorithm"]
__all__ = ["Halgorithm", "HalgorithemVerifier", "verify", "verify_urls"]
1 change: 1 addition & 0 deletions Halgorithem/checks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Higher-level verification checks for Halgorithem."""
91 changes: 91 additions & 0 deletions Halgorithem/checks/atomic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import re
from functools import lru_cache

from ..claim_extraction import split_atomic_claims
from ..models import AtomicCheck, AtomicClaim


@lru_cache(maxsize=8192)
def _tokens(text):
return frozenset(t.lower() for t in re.findall(r"\b[a-zA-Z][a-zA-Z'-]+\b", text or "") if len(t) > 2)


def _overlap(left, right):
if not left:
return 0.0
return len(left & right) / len(left)


def _priority(verdict):
return {"CONTRADICT": 3, "ENTAIL": 2, "NEUTRAL": 1}.get(verdict, 0)


def _score_claim(verdict, confidence):
if verdict == "ENTAIL":
return confidence
if verdict == "CONTRADICT":
return -confidence
return 0.5 * confidence


def prepare_document_claims(document):
doc_claims = []
for sentence in document:
for claim in split_atomic_claims(sentence.resolved_text) or [sentence.resolved_text]:
doc_claims.append((claim, _tokens(claim)))
return doc_claims


def atomic_claim_nli(processed_sentence, document, nli_model=None, doc_claims=None):
ai_text = getattr(processed_sentence, "resolved_text", processed_sentence)
ai_claims = split_atomic_claims(ai_text) or [ai_text]
doc_claims = doc_claims if doc_claims is not None else prepare_document_claims(document)

if not doc_claims:
return AtomicCheck(status="no_document_claims")

matched = []
for claim in ai_claims:
claim_tokens = _tokens(claim)
best_claim, best_tokens = max(doc_claims, key=lambda pair: _overlap(claim_tokens, pair[1]))
matched.append((claim, best_claim, best_tokens))

if nli_model is not None:
nli_results = nli_model.predict_batch(
[best_claim for _, best_claim, _ in matched],
[claim for claim, _, _ in matched],
)
else:
nli_results = [None] * len(matched)

results = []
for (claim, best_claim, best_tokens), nli in zip(matched, nli_results):
claim_tokens = _tokens(claim)
if not claim_tokens or not best_tokens:
results.append(AtomicClaim(claim=claim, verdict="NEUTRAL", confidence=0.0, evidence=""))
continue
if nli is None:
overlap = _overlap(claim_tokens, best_tokens)
verdict = "ENTAIL" if overlap >= 0.70 else "NEUTRAL"
confidence = overlap if verdict == "ENTAIL" else 0.50
elif nli.label == "CONTRADICTION":
verdict = "CONTRADICT"
confidence = nli.score
elif nli.label == "ENTAILMENT":
verdict = "ENTAIL"
confidence = nli.score
else:
verdict = "NEUTRAL"
confidence = nli.score
results.append(AtomicClaim(claim=claim, verdict=verdict, confidence=confidence, evidence=best_claim))

entail = sum(1 for c in results if c.verdict == "ENTAIL")
contradict = sum(1 for c in results if c.verdict == "CONTRADICT")
total = len(results)
if total:
weighted_sum = sum(_score_claim(claim.verdict, claim.confidence) for claim in results)
score = weighted_sum / total
else:
score = None
results.sort(key=lambda c: (_priority(c.verdict), c.confidence), reverse=True)
return AtomicCheck(claims=results, score=score, status="ok")
101 changes: 101 additions & 0 deletions Halgorithem/checks/nli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import re
from functools import lru_cache

from ..contradiction import find_contradiction
from ..models import NLICheck
from ..text_processing import extract_numbers, has_negation_mismatch, lemmatize_tokens


@lru_cache(maxsize=8192)
def _tokens(text):
return frozenset(t.lower() for t in re.findall(r"\b[a-zA-Z][a-zA-Z'-]+\b", text or "") if len(t) > 2)


@lru_cache(maxsize=8192)
def _content_lemmas(text):
return frozenset(lemma for lemma in lemmatize_tokens(text) if len(lemma) > 2)


class NLIModel:
model_quality = 0.75

def predict(self, premise, hypothesis):
return self.predict_batch([premise], [hypothesis])[0]

def predict_batch(self, premises, hypotheses):
return [rule_nli(premise, hypothesis) for premise, hypothesis in zip(premises, hypotheses)]


def rule_nli(premise, hypothesis):
chunk = {"text": premise or "", "numbers": extract_numbers(premise)}
issue = find_contradiction(
claim=hypothesis,
chunk=chunk,
extract_numbers=extract_numbers,
has_negation_mismatch=has_negation_mismatch,
score=1.0,
threshold=0.0,
)
if issue:
return NLICheck("CONTRADICTION", 0.84, issue.get("reason", "Contradiction"), model_quality=0.75)

premise_tokens = _tokens(premise)
hypothesis_tokens = _tokens(hypothesis)
premise_numbers = set(extract_numbers(premise))
hypothesis_numbers = set(extract_numbers(hypothesis))
if hypothesis_numbers and not premise_numbers:
return NLICheck("NEUTRAL", 0.35, "Missing number evidence", model_quality=0.75)
if hypothesis_numbers and premise_numbers and not hypothesis_numbers.issubset(premise_numbers):
return NLICheck("NEUTRAL", 0.42, "Number evidence differs", model_quality=0.75)
if hypothesis_tokens:
token_overlap = len(premise_tokens & hypothesis_tokens) / len(hypothesis_tokens)
premise_lemmas = _content_lemmas(premise)
hypothesis_lemmas = _content_lemmas(hypothesis)
lemma_overlap = len(premise_lemmas & hypothesis_lemmas) / len(hypothesis_lemmas) if hypothesis_lemmas else 0.0
overlap = max(token_overlap, lemma_overlap)
if overlap >= 0.70:
return NLICheck("ENTAILMENT", min(0.60 + overlap * 0.30, 0.92), model_quality=0.75)
return NLICheck("NEUTRAL", 0.50, model_quality=0.75)


def sentence_nli(processed_sentence, document=None, nli_model=None, hits=None):
model = nli_model or NLIModel()
claim = getattr(processed_sentence, "resolved_text", processed_sentence)
claim = claim if isinstance(claim, str) else str(claim)
relevant_hits = hits or []
if not relevant_hits and document is not None:
relevant_hits = [{"sentence": s.resolved_text, "score": 1.0} for s in document[:1]]

best_hit_score = max(
(
(hit.get("score", 0.0) if isinstance(hit, dict) else getattr(hit, "score", 0.0))
for hit in relevant_hits
),
default=0.0,
)
min_hit_score = max(0.30, best_hit_score * 0.80)
premises = []
hypotheses = []
hit_scores = []
for hit in relevant_hits[:5]:
premise = hit.get("sentence") if isinstance(hit, dict) else getattr(hit, "sentence", str(hit))
hit_score = hit.get("score", 0.0) if isinstance(hit, dict) else getattr(hit, "score", 0.0)
if hit_score < min_hit_score:
continue
premises.append(premise)
hypotheses.append(claim)
hit_scores.append(hit_score)
if not premises:
return NLICheck("NEUTRAL", 0.50, model_quality=getattr(model, "model_quality", 1.0))

results = model.predict_batch(premises, hypotheses)
entailments = [r for r in results if r.label == "ENTAILMENT"]
strong_entailment = max(entailments, key=lambda r: r.score) if entailments else None
contradictions = [r for r in results if r.label == "CONTRADICTION"]
if strong_entailment and strong_entailment.score >= 0.82:
return strong_entailment
if contradictions:
return max(contradictions, key=lambda r: r.score)
if entailments:
return max(entailments, key=lambda r: r.score)
return max(results, key=lambda r: r.score)
54 changes: 54 additions & 0 deletions Halgorithem/checks/similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import heapq

from ..models import SimilarityCheck, SimilarityHit
def _token_overlap(left_tokens, right_tokens):
left_tokens = {token for token in left_tokens if len(token) > 2}
if not left_tokens or not right_tokens:
return 0.0
return len(left_tokens & set(right_tokens)) / len(left_tokens)


def _similarities(embedder, left, rights):
if hasattr(embedder, "similarity_many"):
try:
return embedder.similarity_many(left, rights)
except Exception:
pass
return [float(embedder.similarity(left, right)) for right in rights]


def similarity_search(processed_sentence, document, embedder, top_k=5):
query_embedding = processed_sentence.embedding
if query_embedding is None:
query_embedding = embedder.encode(processed_sentence.resolved_text, convert_to_tensor=True)

embeddings = []
for doc_sentence in document:
embedding = doc_sentence.embedding
if embedding is None:
embedding = embedder.encode(doc_sentence.resolved_text, convert_to_tensor=True)
embeddings.append(embedding)

raw_scores = _similarities(embedder, query_embedding, embeddings)
hits = []
for doc_sentence, raw_score in zip(document, raw_scores):
overlap = _token_overlap(processed_sentence.tokens, doc_sentence.tokens)
lemma_overlap = _token_overlap(processed_sentence.lemmas, doc_sentence.lemmas)
number_bonus = 0.05 if processed_sentence.numbers and processed_sentence.numbers.issubset(doc_sentence.numbers) else 0.0
overlap = max(overlap, lemma_overlap)
score = min(raw_score + 0.12 * overlap, 1.0)
score = min(score + number_bonus, 1.0)
hits.append(
SimilarityHit(
sentence=doc_sentence.context_text or doc_sentence.resolved_text,
score=score,
source=doc_sentence.source,
sentence_id=doc_sentence.sentence_id,
source_quality=doc_sentence.source_quality,
)
)

selected = heapq.nlargest(top_k, hits, key=lambda hit: hit.score)
source_quality = max((hit.source_quality for hit in selected), default=0.55)
score = selected[0].score if selected else 0.0
return SimilarityCheck(score=score, hits=selected, source_quality=source_quality)
38 changes: 38 additions & 0 deletions Halgorithem/checks/units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
UNIT_ALIASES = {
"g": "gram",
"gram": "gram",
"grams": "gram",
"kg": "kilogram",
"kilogram": "kilogram",
"kilograms": "kilogram",
"lb": "pound",
"lbs": "pound",
"pound": "pound",
"pounds": "pound",
"m": "meter",
"meter": "meter",
"meters": "meter",
"cm": "centimeter",
"centimeter": "centimeter",
"centimeters": "centimeter",
"km": "kilometer",
"kilometer": "kilometer",
"kilometers": "kilometer",
"mile": "mile",
"miles": "mile",
}

NORMALIZATION = {
"gram": ("mass", 0.001),
"kilogram": ("mass", 1.0),
"pound": ("mass", 0.45359237),
"meter": ("length", 1.0),
"centimeter": ("length", 0.01),
"kilometer": ("length", 1000.0),
"mile": ("length", 1609.344),
}


def normalize_unit(unit):
canonical = UNIT_ALIASES.get((unit or "").lower())
return NORMALIZATION.get(canonical) if canonical else None
12 changes: 4 additions & 8 deletions Halgorithem/claim_extraction.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
import re

from .nlp import nlp
from .nlp import parse


CLAIM_SPLIT_RE = re.compile(r"\s*(?:;|\n+|\s+-\s+)\s*")
CONJUNCTION_RE = re.compile(
r"\s+(?:and|but|while|whereas)\s+"
r"(?=(?:[A-Z][a-z]+|\d|it\b|he\b|she\b|they\b|the\b|a\b|an\b))",
re.IGNORECASE,
)
CONJUNCTION_RE = re.compile(r"\s+(?:and|but|while|whereas)\s+", re.IGNORECASE)


def _has_factual_shape(text):
doc = nlp(text)
doc = parse(text)
has_subject = any(t.dep_ in {"nsubj", "nsubjpass"} for t in doc)
has_verb = any(t.pos_ in {"VERB", "AUX"} for t in doc)
has_anchor = any(doc.ents) or any(t.like_num for t in doc) or any(t.pos_ == "PROPN" for t in doc)
Expand All @@ -27,7 +23,7 @@ def _has_factual_shape(text):


def _has_event_verb(text):
doc = nlp(text)
doc = parse(text)
return any(t.pos_ in {"VERB", "AUX"} for t in doc)


Expand Down
27 changes: 23 additions & 4 deletions Halgorithem/confidence.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .nlp import parse


INFERENTIAL_TERMS = {
"helped",
"made",
Expand All @@ -9,6 +12,14 @@
"significant",
"influential",
}
INFERENTIAL_ROOT_LEMMAS = {
"help",
"ease",
"learn",
"influence",
"matter",
"signify",
}

NEGATION_TERMS = {
"no",
Expand All @@ -28,8 +39,9 @@


def is_inferential_claim(claim):
words = set((claim or "").lower().replace(".", "").split())
return bool(words & INFERENTIAL_TERMS)
doc = parse(claim)
root = next((t for t in doc if t.dep_ == "ROOT"), None)
return bool(root and root.lemma_.lower() in INFERENTIAL_ROOT_LEMMAS)


def is_negative_claim(claim):
Expand All @@ -42,7 +54,14 @@ def classify_support(score, threshold=0.30, contradiction=None, unsupported_term
supported_threshold = max(threshold + 0.10, 0.40)

hard_contradiction = contradiction and contradiction.get("reason") in {
"Date mismatch", "Number mismatch", "Unit mismatch", "Negation mismatch"
"Date mismatch",
"Number mismatch",
"Unit mismatch",
"Negation mismatch",
"Entity-role mismatch",
"Location mismatch",
"Source qualifier mismatch",
"NLI contradiction",
}
if hard_contradiction:
return "CONTRADICTION"
Expand Down Expand Up @@ -71,6 +90,6 @@ def confidence_score(score, evidence_count=0, contradiction=None, unsupported_te
confidence = max(0.0, min(float(score), 1.0))
confidence += min(evidence_count, 3) * 0.04
confidence -= min(len(unsupported_terms), 4) * 0.06
if contradiction:
if contradiction and status in {"CONTRADICTION", "HALLUCINATION"}:
confidence += 0.10
return round(max(0.0, min(confidence, 1.0)), 3)
Loading
Loading