Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions src/Storages/HybridSegmentPruner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include <Storages/HybridSegmentPruner.h>

#include <Core/Range.h>
#include <DataTypes/IDataType.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Storages/ColumnsDescription.h>

namespace DB
{

namespace
{

ASTPtr makeIdentityKeyAST(const Names & column_names)
{
auto key_ast = make_intrusive<ASTFunction>();
key_ast->name = "tuple";
key_ast->arguments = make_intrusive<ASTExpressionList>();
key_ast->children.push_back(key_ast->arguments);
for (const auto & name : column_names)
key_ast->arguments->children.push_back(make_intrusive<ASTIdentifier>(name));
return key_ast;
}

NamesAndTypesList filterComparable(const NamesAndTypesList & in)
{
NamesAndTypesList out;
for (const auto & c : in)
if (c.type && c.type->isComparable())
out.push_back(c);
return out;
}

KeyDescription buildIdentityKey(const NamesAndTypesList & comparable_cols, ContextPtr context)
{
Names names;
names.reserve(comparable_cols.size());
for (const auto & c : comparable_cols)
names.push_back(c.name);
return KeyDescription::getKeyFromAST(
makeIdentityKeyAST(names),
ColumnsDescription{comparable_cols},
context);
}

NamesAndTypesList namesAndTypesFromKey(const KeyDescription & key)
{
NamesAndTypesList out;
for (size_t i = 0; i < key.column_names.size(); ++i)
out.emplace_back(key.column_names[i], key.data_types[i]);
return out;
}

}

HybridSegmentPruner::HybridSegmentPruner(
const ActionsDAGWithInversionPushDown & filter_dag,
const NamesAndTypesList & hybrid_columns,
ContextPtr context_)
: identity_key(buildIdentityKey(filterComparable(hybrid_columns), context_))
, user_condition(filter_dag, context_,
identity_key.column_names, identity_key.expression,
/*single_point=*/ false)
, context(std::move(context_))
{
useless = identity_key.column_names.empty() || user_condition.alwaysUnknownOrTrue();
}

bool HybridSegmentPruner::canBePruned(const ASTPtr & substituted_segment_predicate) const
try
{
if (useless || !substituted_segment_predicate)
return false;

auto segment_ast = substituted_segment_predicate->clone();
auto sample = namesAndTypesFromKey(identity_key);
auto syntax_result = TreeRewriter(context).analyze(segment_ast, sample);
auto segment_dag = ExpressionAnalyzer(segment_ast, syntax_result, context).getActionsDAG(true);
ActionsDAGWithInversionPushDown segment_filter(segment_dag.getOutputs().at(0), context);

KeyCondition segment_condition(
segment_filter, context,
identity_key.column_names, identity_key.expression,
/*single_point=*/ false);

Hyperrectangle rect;
rect.reserve(identity_key.column_names.size());

for (size_t i = 0; i < identity_key.column_names.size(); ++i)
{
Ranges col_ranges;
if (!segment_condition.extractPlainRangesForColumn(i, col_ranges))
{
rect.push_back(Range::createWholeUniverse());
continue;
}

if (col_ranges.empty())
return true;

if (col_ranges.size() != 1)
{
rect.push_back(Range::createWholeUniverse());
continue;
}

rect.push_back(col_ranges.front());
}

return !user_condition.checkInHyperrectangle(rect, identity_key.data_types).can_be_true;
}
catch (...)
{
return false;
}

}
47 changes: 47 additions & 0 deletions src/Storages/HybridSegmentPruner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#pragma once

#include <Core/NamesAndTypes.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/KeyDescription.h>
#include <Storages/MergeTree/KeyCondition.h>

namespace DB
{

/// Hybrid-segment pruner, modeled after PartitionPruner / Iceberg::ManifestFilesPruner /
/// Paimon::PartitionPruner.
///
/// Build one KeyCondition over the user filter (PREWHERE+WHERE represented as an
/// ActionsDAG) using all comparable Hybrid columns as the key. For each segment, build
/// a second KeyCondition from its (already watermark-substituted) predicate AST and
/// use `KeyCondition::extractPlainRangesForColumn` to obtain a Hyperrectangle (fail-open
/// to whole-universe per column when extraction is ambiguous). Then ask
/// `KeyCondition::checkInHyperrectangle(rect, types).can_be_true`. The segment can be
/// pruned iff the answer is false.
///
/// canBePruned() returns true only when (user_filter AND segment_predicate) is provably
/// empty. It returns false in all other cases — unsupported segment shapes, missing user
/// filter, exceptions — so the caller falls back to scanning the segment normally.
class HybridSegmentPruner
{
public:
HybridSegmentPruner(
const ActionsDAGWithInversionPushDown & filter_dag,
const NamesAndTypesList & hybrid_columns,
ContextPtr context);

bool canBePruned(const ASTPtr & substituted_segment_predicate) const;

/// True if the user filter is unrecognizable / always-true on the Hybrid key columns:
/// no segment can ever be pruned, so callers can short-circuit.
bool isUseless() const { return useless; }

private:
KeyDescription identity_key;
KeyCondition user_condition;
ContextPtr context;
bool useless = false;
};

}
42 changes: 38 additions & 4 deletions src/Storages/MergeTree/KeyCondition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3226,10 +3226,18 @@ bool KeyCondition::extractPlainRanges(Ranges & ranges) const
if (key_columns.size() != 1)
return false;

return extractPlainRangesForColumn(0, ranges);
}

bool KeyCondition::extractPlainRangesForColumn(size_t column_index, Ranges & ranges) const
{
if (column_index >= key_columns.size())
return false;

if (hasMonotonicFunctionsChain())
return false;

/// All Ranges in rpn_stack is plain.
/// All Ranges in rpn_stack are plain.
std::stack<PlainRanges> rpn_stack;

for (const auto & element : rpn)
Expand Down Expand Up @@ -3280,14 +3288,31 @@ bool KeyCondition::extractPlainRanges(Ranges & ranges) const
{
if (element.function == RPNElement::FUNCTION_IN_RANGE)
{
rpn_stack.push(PlainRanges(element.range));
if (element.getKeyColumn() != column_index)
rpn_stack.push(PlainRanges::makeUniverse());
else
rpn_stack.push(PlainRanges(element.range));
}
else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
{
rpn_stack.push(PlainRanges(element.range.invertRange()));
if (element.getKeyColumn() != column_index)
rpn_stack.push(PlainRanges::makeUniverse());
else
rpn_stack.push(PlainRanges(element.range.invertRange()));
}
else if (element.function == RPNElement::FUNCTION_IN_SET)
{
/// Only single-column set atoms are supported. For multi-column tuple-IN, bail out;
/// the caller falls back to "can't prune" (see `HybridSegmentPruner::canBePruned`).
const auto & mapping = element.set_index->getIndexesMapping();
if (mapping.size() != 1)
return false;
if (mapping[0].key_index != column_index)
{
rpn_stack.push(PlainRanges::makeUniverse());
continue;
}

if (element.set_index->hasMonotonicFunctionsChain())
return false;

Expand All @@ -3313,6 +3338,15 @@ bool KeyCondition::extractPlainRanges(Ranges & ranges) const
}
else if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
const auto & mapping = element.set_index->getIndexesMapping();
if (mapping.size() != 1)
return false;
if (mapping[0].key_index != column_index)
{
rpn_stack.push(PlainRanges::makeUniverse());
continue;
}

if (element.set_index->hasMonotonicFunctionsChain())
return false;

Expand Down Expand Up @@ -3379,7 +3413,7 @@ bool KeyCondition::extractPlainRanges(Ranges & ranges) const
}

if (rpn_stack.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::extractPlainRanges");
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::extractPlainRangesForColumn");

ranges = std::move(rpn_stack.top().ranges);
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/Storages/MergeTree/KeyCondition.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ class KeyCondition
/// TODO handle the cases when generate RPN.
bool extractPlainRanges(Ranges & ranges) const;

/// Same stack algorithm as extractPlainRanges, but for a multi-column key: logical ops apply
/// as usual, while atoms that constrain other key columns become the universe for `column_index`.
/// Returns false if the RPN contains unsupported atoms for this extraction (same as extractPlainRanges).
bool extractPlainRangesForColumn(size_t column_index, Ranges & ranges) const;

/// The expression is stored as Reverse Polish Notation.
struct RPNElement
{
Expand Down
Loading
Loading