diff --git a/native/Cargo.lock b/native/Cargo.lock index bdd221e95f..323a53bdf7 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -359,6 +359,7 @@ dependencies = [ "arrow-select", "flatbuffers", "lz4_flex", + "zstd", ] [[package]] @@ -1170,9 +1171,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" dependencies = [ "hybrid-array", ] @@ -1883,14 +1884,13 @@ dependencies = [ [[package]] name = "datafusion" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" +checksum = "997a31e15872606a49478e670c58302094c97cb96abb0a7d60720f8e92170040" dependencies = [ "arrow", "arrow-schema", "async-trait", - "bytes", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1918,13 +1918,12 @@ dependencies = [ "datafusion-session", "datafusion-sql", "futures", + "indexmap 2.14.0", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.4", - "regex", "sqlparser", "tempfile", "tokio", @@ -1934,9 +1933,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" +checksum = "f7dd61161508f8f5fa1107774ea687bd753c22d83a32eebf963549f89de14139" dependencies = [ "arrow", "async-trait", @@ -1959,9 +1958,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" +checksum = "897c70f871277f9ce99aa38347be0d679bbe3e617156c4d2a8378cec8a2a0891" dependencies = [ "arrow", "async-trait", @@ -2157,16 +2156,17 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" +checksum = "121c9ded5d87d9172319e006f2afdb9928d72dbacd6a90a458d8acb1e3b43a65" dependencies = [ - "ahash", "arrow", "arrow-ipc", + "arrow-schema", "chrono", + "foldhash 0.2.0", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "hex", "indexmap 2.14.0", "itertools 0.14.0", @@ -2174,17 +2174,17 @@ dependencies = [ "log", "object_store", "parquet", - "paste", "sqlparser", "tokio", + "uuid", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" +checksum = "981b9dae74f78ee3d9f714fb49b01919eab975461b56149510c3ba9ea11287d1" dependencies = [ "futures", "log", @@ -2193,9 +2193,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" +checksum = "ffd7d295b2ec7c00d8a56562f41ed41062cf0af75549ed891c12a0a09eddfefe" dependencies = [ "arrow", "async-compression", @@ -2219,6 +2219,7 @@ dependencies = [ "liblzma", "log", "object_store", + "parking_lot", "rand 0.9.4", "tokio", "tokio-util", @@ -2228,9 +2229,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" +checksum = "552b0b3f342f7ec41b3fbd70f6339dc82a30cfd0349e7f280e7852528085349f" dependencies = [ "arrow", "arrow-ipc", @@ -2252,9 +2253,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" +checksum = "68850aa426b897e879c8b87e512ea8124f1d0a2869a4e51808ddaaddf1bc0ada" dependencies = [ "arrow", "async-trait", @@ -2275,9 +2276,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" +checksum = "402f93242ae08ef99139ee2c528a49d087efe88d5c7b2c3ff5480855a40ce54f" dependencies = [ "arrow", "async-trait", @@ -2292,16 +2293,15 @@ dependencies = [ "datafusion-session", "futures", "object_store", - "serde_json", "tokio", "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997" +checksum = "ffd2499c1bee0eeccf6a57156105700eeeb17bc701899ac719183c4e74231450" dependencies = [ "arrow", "async-trait", @@ -2311,6 +2311,7 @@ dependencies = [ "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", @@ -2329,20 +2330,19 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" +checksum = "cb9e7e5d11130c48c8bd4e80c79a9772dd28ce6dc330baca9246205d245b9e2e" [[package]] name = "datafusion-execution" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" +checksum = "37a8643ab852eb68864e1b72ae789e8066282dce48eea6347ffb0aee33d1ccc0" dependencies = [ "arrow", "arrow-buffer", "async-trait", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -2359,11 +2359,12 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" +checksum = "6932f4d71eed9c8d9341476a2b845aadfabde5495d08dbcd8fc23881f49fa7a0" dependencies = [ "arrow", + "arrow-schema", "async-trait", "chrono", "datafusion-common", @@ -2374,29 +2375,27 @@ dependencies = [ "datafusion-physical-expr-common", "indexmap 2.14.0", "itertools 0.14.0", - "paste", "serde_json", "sqlparser", ] [[package]] name = "datafusion-expr-common" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" +checksum = "0225491839a31b1f7d2cb8092c2d50792e2fe1c1724e4e6d08e011f5feaf4ed2" dependencies = [ "arrow", "datafusion-common", "indexmap 2.14.0", "itertools 0.14.0", - "paste", ] [[package]] name = "datafusion-functions" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" +checksum = "14872c47bfc3d21e53ec82f57074e6987a15941c1e2f43cde4ac6ae2746634e3" dependencies = [ "arrow", "arrow-buffer", @@ -2411,26 +2410,25 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", + "datafusion-physical-expr-common", "hex", "itertools 0.14.0", "log", - "md-5 0.10.6", + "md-5 0.11.0", "memchr", "num-traits", "rand 0.9.4", "regex", - "sha2 0.10.9", - "unicode-segmentation", + "sha2 0.11.0", "uuid", ] [[package]] name = "datafusion-functions-aggregate" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" +checksum = "75a2ca14e1b609be21e657e2d3130b2f446456b08393b377bb721a33952d2e09" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-doc", @@ -2440,19 +2438,18 @@ dependencies = [ "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", + "foldhash 0.2.0", "half", "log", "num-traits", - "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" +checksum = "1ece74ba09092d2ef9c9b54a38445450aea292a1f8b04faf531936b723a24b3c" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr-common", @@ -2461,9 +2458,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" +checksum = "3f3e3f9ee8ca59bf70518802107de6f1b88a9509efdc629fadc5de9d6b2d5ef5" dependencies = [ "arrow", "arrow-ord", @@ -2477,34 +2474,34 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "itertools 0.14.0", "itoa", "log", - "paste", + "memchr", ] [[package]] name = "datafusion-functions-table" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" +checksum = "89161dffc22cf2b50f9f4b1bee83b5221d3b4ed7c2e37fd7aa2b22a5297b3a26" dependencies = [ "arrow", "async-trait", "datafusion-catalog", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", "parking_lot", - "paste", ] [[package]] name = "datafusion-functions-window" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" +checksum = "d7339345b226b3874037708bf5023ba1c2de705128f8457a095aae5ae9cb9c78" dependencies = [ "arrow", "datafusion-common", @@ -2515,14 +2512,13 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "log", - "paste", ] [[package]] name = "datafusion-functions-window-common" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" +checksum = "fa84836dc2392df6f43d6a29d37fb56a8ebdc8b3f4e10ae8dc15861fd20278fb" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2530,9 +2526,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" +checksum = "587164e03ad68732aa9e7bfe5686e3f25970d4c64fd4bd80790749840892dae5" dependencies = [ "datafusion-doc", "quote", @@ -2541,9 +2537,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" +checksum = "77f20e8cf9e8654d92f4c16b24c487353ee5bf153ffc12d5772cd399ab8cd281" dependencies = [ "arrow", "chrono", @@ -2560,11 +2556,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" +checksum = "f015a4a82f6f7ff7e1d8d4bf3870a936752fa38b17705dfcc14adef95aa8922c" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr", @@ -2572,20 +2567,19 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", - "paste", "petgraph", "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" +checksum = "51e6ffff8acdfe54e0ea15ccf38115c4a9184433b0439f42907637928d00a235" dependencies = [ "arrow", "datafusion-common", @@ -2598,26 +2592,26 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" +checksum = "7967a3e171c6a4bf09474b3f7a14f1a3db13ed1714ba12156f33fcce2bba54e8" dependencies = [ - "ahash", "arrow", "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", + "pin-project", ] [[package]] name = "datafusion-physical-optimizer" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" +checksum = "59ff803e2a96054cb6d83f35f9e60fd4f42eac515e1932bd1b2dbc91d5fcbf36" dependencies = [ "arrow", "datafusion-common", @@ -2633,12 +2627,13 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" +checksum = "776ee54d47d15bdb126452f9ca17b03761e3b004682914beaedd3f86eb507fbc" dependencies = [ - "ahash", "arrow", + "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "async-trait", @@ -2653,7 +2648,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "log", @@ -2665,9 +2660,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" +checksum = "d5fb9e5774660aa69c3ba93c610f175f75b65cb8c3776edb3626de8f3a4f4ee3" dependencies = [ "arrow", "datafusion-common", @@ -2676,15 +2671,14 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools 0.14.0", "log", ] [[package]] name = "datafusion-session" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" +checksum = "15ce715fa2a61f4623cc234bcc14a3ef6a91f189128d5b14b468a6a17cdfc417" dependencies = [ "async-trait", "datafusion-common", @@ -2696,9 +2690,9 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e059dcf8544da0d6598d0235be3cc29c209094a5976b2e4822e4a2cf91c2b5c5" +checksum = "390bb0bf37cb2b95ffd65eacd66f60df50793d1f94097799e416f39477a51957" dependencies = [ "arrow", "bigdecimal", @@ -2711,21 +2705,24 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-functions-nested", "log", + "num-traits", "percent-encoding", "rand 0.9.4", "serde_json", - "sha1 0.10.6", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", + "twox-hash", "url", ] [[package]] name = "datafusion-sql" -version = "53.1.0" +version = "54.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" +checksum = "6094ad36a3ed6d7ac87b20b479b2d0b118250f66cf997603829fdc65b44a7099" dependencies = [ "arrow", "bigdecimal", @@ -2846,7 +2843,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "block-buffer 0.12.0", + "block-buffer 0.12.1", "const-oid 0.10.2", "crypto-common 0.2.2", "ctutils", @@ -3330,21 +3327,15 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.1" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" dependencies = [ "allocator-api2", "equivalent", "foldhash 0.2.0", ] -[[package]] -name = "hashbrown" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" - [[package]] name = "hdfs-sys" version = "0.3.0" @@ -6416,9 +6407,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.61.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" +checksum = "13c6d1b651dc4edf07eead2a0c6c78016ce971bc2c10da5266861b13f25e7cec" dependencies = [ "log", "sqlparser_derive", diff --git a/native/Cargo.toml b/native/Cargo.toml index d764350e02..1babae7036 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -38,10 +38,10 @@ arrow = { version = "58.3.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.11.1" } parquet = { version = "58.3.0", default-features = false, features = ["experimental"] } -datafusion = { version = "53.1.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { version = "53.1.0" } -datafusion-physical-expr-adapter = { version = "53.1.0" } -datafusion-spark = { version = "53.1.0", features = ["core"] } +datafusion = { version = "54.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { version = "54.0.0" } +datafusion-physical-expr-adapter = { version = "54.0.0" } +datafusion-spark = { version = "54.0.0", features = ["core"] } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-common = { path = "common" } datafusion-comet-jni-bridge = { path = "jni-bridge" } diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index d9c45a08a3..4e7e278944 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -92,7 +92,7 @@ jni = { version = "0.22.4", features = ["invocation"] } lazy_static = "1.4" assertables = "10" hex = "0.4.3" -datafusion-functions-nested = { version = "53.1.0" } +datafusion-functions-nested = { version = "54.0.0" } [features] backtrace = ["datafusion/backtrace"] diff --git a/native/core/src/debug/debug_batch_stream.rs b/native/core/src/debug/debug_batch_stream.rs index b59b6a36f7..0c5289b198 100644 --- a/native/core/src/debug/debug_batch_stream.rs +++ b/native/core/src/debug/debug_batch_stream.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::fmt; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -82,9 +81,6 @@ impl datafusion::physical_plan::ExecutionPlan for DebugExecutionDataStream { fn name(&self) -> &str { "DebugExecutionDataStream" } - fn as_any(&self) -> &dyn std::any::Any { - self - } fn properties(&self) -> &Arc { self.inner.properties() } @@ -155,9 +151,6 @@ impl Hash for DebugExecutionDataPhyExpr { } impl PhysicalExpr for DebugExecutionDataPhyExpr { - fn as_any(&self) -> &dyn Any { - self - } fn data_type(&self, input_schema: &Schema) -> Result { self.inner.data_type(input_schema) } diff --git a/native/core/src/execution/expressions/arithmetic.rs b/native/core/src/execution/expressions/arithmetic.rs index 320532d773..8d4c59a010 100644 --- a/native/core/src/execution/expressions/arithmetic.rs +++ b/native/core/src/execution/expressions/arithmetic.rs @@ -77,10 +77,6 @@ impl Hash for CheckedBinaryExpr { } impl PhysicalExpr for CheckedBinaryExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.child.fmt_sql(f) } diff --git a/native/core/src/execution/expressions/list_positions.rs b/native/core/src/execution/expressions/list_positions.rs index 117aab4d97..11643db4ce 100644 --- a/native/core/src/execution/expressions/list_positions.rs +++ b/native/core/src/execution/expressions/list_positions.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -68,10 +67,6 @@ impl Hash for ListPositionsExpr { } impl PhysicalExpr for ListPositionsExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/core/src/execution/expressions/strings.rs b/native/core/src/execution/expressions/strings.rs index 7219395963..4a7c44cc3e 100644 --- a/native/core/src/execution/expressions/strings.rs +++ b/native/core/src/execution/expressions/strings.rs @@ -91,7 +91,7 @@ impl ExpressionBuilder for RlikeBuilder { let left = planner.create_expr(expr.left.as_ref().unwrap(), Arc::clone(&input_schema))?; let right = planner.create_expr(expr.right.as_ref().unwrap(), input_schema)?; - match right.as_any().downcast_ref::().unwrap().value() { + match right.downcast_ref::().unwrap().value() { ScalarValue::Utf8(Some(pattern)) => Ok(Arc::new(RLike::try_new(left, pattern)?)), _ => Err(ExecutionError::GeneralError( "RLike only supports scalar patterns".to_string(), diff --git a/native/core/src/execution/expressions/subquery.rs b/native/core/src/execution/expressions/subquery.rs index 9272ede60c..fc7b8104d2 100644 --- a/native/core/src/execution/expressions/subquery.rs +++ b/native/core/src/execution/expressions/subquery.rs @@ -29,7 +29,6 @@ use jni::{ sys::{jboolean, jbyte, jint, jlong, jshort}, }; use std::{ - any::Any, fmt::{Display, Formatter}, hash::Hash, sync::Arc, @@ -63,10 +62,6 @@ impl Display for Subquery { } impl PhysicalExpr for Subquery { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/core/src/execution/memory_pools/fair_pool.rs b/native/core/src/execution/memory_pools/fair_pool.rs index e4a7ceab54..347c3d8ef6 100644 --- a/native/core/src/execution/memory_pools/fair_pool.rs +++ b/native/core/src/execution/memory_pools/fair_pool.rs @@ -16,7 +16,7 @@ // under the License. use std::{ - fmt::{Debug, Formatter, Result as FmtResult}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, sync::Arc, }; @@ -83,10 +83,25 @@ impl CometFairMemoryPool { } } +impl Display for CometFairMemoryPool { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + let state = self.state.lock(); + write!( + f, + "CometFairMemoryPool(pool_size={}, used={}, num={})", + self.pool_size, state.used, state.num + ) + } +} + unsafe impl Send for CometFairMemoryPool {} unsafe impl Sync for CometFairMemoryPool {} impl MemoryPool for CometFairMemoryPool { + fn name(&self) -> &str { + "CometFairMemoryPool" + } + fn register(&self, _: &MemoryConsumer) { let mut state = self.state.lock(); state.num = state diff --git a/native/core/src/execution/memory_pools/logging_pool.rs b/native/core/src/execution/memory_pools/logging_pool.rs index c23672d01a..4e7b68a728 100644 --- a/native/core/src/execution/memory_pools/logging_pool.rs +++ b/native/core/src/execution/memory_pools/logging_pool.rs @@ -19,6 +19,7 @@ use datafusion::execution::memory_pool::{ MemoryConsumer, MemoryLimit, MemoryPool, MemoryReservation, }; use log::{info, warn}; +use std::fmt; use std::sync::Arc; #[derive(Debug)] @@ -27,6 +28,12 @@ pub(crate) struct LoggingMemoryPool { pool: Arc, } +impl fmt::Display for LoggingMemoryPool { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LoggingMemoryPool(task={})", self.task_attempt_id) + } +} + impl LoggingMemoryPool { pub fn new(task_attempt_id: u64, pool: Arc) -> Self { Self { @@ -37,6 +44,10 @@ impl LoggingMemoryPool { } impl MemoryPool for LoggingMemoryPool { + fn name(&self) -> &str { + "LoggingMemoryPool" + } + fn register(&self, consumer: &MemoryConsumer) { info!( "[Task {}] MemoryPool[{}].register()", diff --git a/native/core/src/execution/memory_pools/unified_pool.rs b/native/core/src/execution/memory_pools/unified_pool.rs index f34418ee94..e72c734d04 100644 --- a/native/core/src/execution/memory_pools/unified_pool.rs +++ b/native/core/src/execution/memory_pools/unified_pool.rs @@ -16,7 +16,7 @@ // under the License. use std::{ - fmt::{Debug, Formatter, Result as FmtResult}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, sync::{ atomic::{AtomicUsize, Ordering::Relaxed}, Arc, @@ -90,10 +90,24 @@ impl Drop for CometUnifiedMemoryPool { } } +impl Display for CometUnifiedMemoryPool { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!( + f, + "CometUnifiedMemoryPool(used={})", + self.used.load(Relaxed) + ) + } +} + unsafe impl Send for CometUnifiedMemoryPool {} unsafe impl Sync for CometUnifiedMemoryPool {} impl MemoryPool for CometUnifiedMemoryPool { + fn name(&self) -> &str { + "CometUnifiedMemoryPool" + } + fn grow(&self, reservation: &MemoryReservation, additional: usize) { self.try_grow(reservation, additional).unwrap(); } diff --git a/native/core/src/execution/merge_as_partial.rs b/native/core/src/execution/merge_as_partial.rs index 1c15ce8329..5ea26115bf 100644 --- a/native/core/src/execution/merge_as_partial.rs +++ b/native/core/src/execution/merge_as_partial.rs @@ -26,7 +26,6 @@ //! outputs state) but redirects `update_batch` calls to `merge_batch`, giving merge //! semantics with state output. -use std::any::Any; use std::fmt::Debug; use std::hash::{Hash, Hasher}; @@ -100,10 +99,6 @@ impl MergeAsPartialUDF { } impl AggregateUDFImpl for MergeAsPartialUDF { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/native/core/src/execution/metrics/utils.rs b/native/core/src/execution/metrics/utils.rs index 94cc5892a4..a94cfdeb69 100644 --- a/native/core/src/execution/metrics/utils.rs +++ b/native/core/src/execution/metrics/utils.rs @@ -71,14 +71,19 @@ pub(crate) fn to_native_metric_node( children: Vec::with_capacity(children.len()), }; - if let Some(metrics) = node_metrics { - for m in metrics.iter() { - let value = m.value(); - native_metric_node - .metrics - .insert(value.name().to_string(), value.as_usize() as i64); - } - } + // Aggregate metrics by name using DataFusion's aggregate_by_name(), which + // correctly handles duplicate metric names (e.g. BaselineMetrics registered + // by both FileStream and ParquetMorselizer on the same ExecutionPlanMetricsSet). + // The additional_native_plans branch below already does this. + node_metrics + .unwrap_or_default() + .aggregate_by_name() + .iter() + .map(|m| m.value()) + .map(|m| (m.name(), m.as_usize() as i64)) + .for_each(|(name, value)| { + native_metric_node.metrics.insert(name.to_string(), value); + }); for child_plan in children { let child_node = to_native_metric_node(child_plan)?; diff --git a/native/core/src/execution/operators/expand.rs b/native/core/src/execution/operators/expand.rs index e06fab23ec..8edc8c4d50 100644 --- a/native/core/src/execution/operators/expand.rs +++ b/native/core/src/execution/operators/expand.rs @@ -29,7 +29,6 @@ use datafusion::{ }; use futures::{Stream, StreamExt}; use std::{ - any::Any, pin::Pin, sync::Arc, task::{Context, Poll}, @@ -91,10 +90,6 @@ impl DisplayAs for ExpandExec { } impl ExecutionPlan for ExpandExec { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index 0dbfff457b..713b4089b0 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -17,7 +17,6 @@ //! Native Iceberg table scan operator using iceberg-rust -use std::any::Any; use std::collections::HashMap; use std::fmt; use std::pin::Pin; @@ -123,10 +122,6 @@ impl ExecutionPlan for IcebergScanExec { "IcebergScanExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::clone(&self.output_schema) } diff --git a/native/core/src/execution/operators/parquet_writer.rs b/native/core/src/execution/operators/parquet_writer.rs index 8ba79098d4..f1168c4a57 100644 --- a/native/core/src/execution/operators/parquet_writer.rs +++ b/native/core/src/execution/operators/parquet_writer.rs @@ -18,7 +18,6 @@ //! Parquet writer operator for writing RecordBatches to Parquet files use std::{ - any::Any, collections::HashMap, fmt, fmt::{Debug, Formatter}, @@ -404,10 +403,6 @@ impl DisplayAs for ParquetWriterExec { #[async_trait] impl ExecutionPlan for ParquetWriterExec { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "ParquetWriterExec" } diff --git a/native/core/src/execution/operators/scan.rs b/native/core/src/execution/operators/scan.rs index 8c8f3b03ba..4b9d76da28 100644 --- a/native/core/src/execution/operators/scan.rs +++ b/native/core/src/execution/operators/scan.rs @@ -33,7 +33,6 @@ use datafusion::{ use futures::Stream; use itertools::Itertools; use std::{ - any::Any, pin::Pin, sync::{Arc, Mutex}, task::{Context, Poll}, @@ -181,10 +180,6 @@ fn schema_from_data_types(data_types: &[DataType]) -> SchemaRef { } impl ExecutionPlan for ScanExec { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } diff --git a/native/core/src/execution/operators/shuffle_scan.rs b/native/core/src/execution/operators/shuffle_scan.rs index 92c4dc8780..f3209b0c1b 100644 --- a/native/core/src/execution/operators/shuffle_scan.rs +++ b/native/core/src/execution/operators/shuffle_scan.rs @@ -37,7 +37,6 @@ use datafusion::{ use futures::Stream; use jni::objects::{Global, JByteBuffer, JObject}; use std::{ - any::Any, pin::Pin, sync::{Arc, Mutex}, task::{Context, Poll}, @@ -221,10 +220,6 @@ fn schema_from_data_types(data_types: &[DataType]) -> SchemaRef { } impl ExecutionPlan for ShuffleScanExec { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index c07a92d700..df690cb118 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -269,7 +269,6 @@ impl PhysicalPlanner { let literal = self.create_expr(partition_value, Arc::::clone(&empty_schema))?; literal - .as_any() .downcast_ref::() .ok_or_else(|| { GeneralError("Expected literal of partition value".to_string()) @@ -463,11 +462,7 @@ impl PhysicalPlanner { // WideDecimalBinaryExpr already handles overflow — skip redundant check // but only if its output type matches CheckOverflow's declared type - if child - .as_any() - .downcast_ref::() - .is_some() - { + if child.downcast_ref::().is_some() { let child_type = child.data_type(&input_schema)?; if child_type == data_type { return Ok(child); @@ -476,7 +471,7 @@ impl PhysicalPlanner { // Fuse Cast(Decimal128→Decimal128) + CheckOverflow into single rescale+check // Only fuse when the Cast target type matches the CheckOverflow output type - if let Some(cast) = child.as_any().downcast_ref::() { + if let Some(cast) = child.downcast_ref::() { if let ( DataType::Decimal128(p_out, s_out), Ok(DataType::Decimal128(_p_in, s_in)), @@ -1402,12 +1397,10 @@ impl PhysicalPlanner { .iter() .map(|expr| { let literal = self.create_expr(expr, Arc::clone(&required_schema))?; - let df_literal = literal - .as_any() - .downcast_ref::() - .ok_or_else(|| { - GeneralError("Expected literal of default value.".to_string()) - })?; + let df_literal = + literal.downcast_ref::().ok_or_else(|| { + GeneralError("Expected literal of default value.".to_string()) + })?; Ok(df_literal.value().clone()) }) .collect(); @@ -1990,7 +1983,7 @@ impl PhysicalPlanner { hash_join.as_ref().swap_inputs(PartitionMode::Partitioned)?; let mut additional_native_plans = vec![]; - if swapped_hash_join.as_any().is::() { + if swapped_hash_join.is::() { // a projection was added to the hash join additional_native_plans.push(Arc::clone(swapped_hash_join.children()[0])); } @@ -2850,8 +2843,7 @@ impl PhysicalPlanner { &boundary_row.partition_bounds[col_idx], Arc::clone(&input_schema), )?; - let literal_expr = - expr.as_any().downcast_ref::().expect("Literal"); + let literal_expr = expr.downcast_ref::().expect("Literal"); col_values.push(literal_expr.value().clone()); } } @@ -2961,12 +2953,7 @@ impl PhysicalPlanner { // TODO this should try and find scalar let arguments = args .iter() - .map(|e| { - e.as_ref() - .as_any() - .downcast_ref::() - .map(|lit| lit.value()) - }) + .map(|e| e.as_ref().downcast_ref::().map(|lit| lit.value())) .collect::>(); let args = ReturnFieldArgs { @@ -3070,7 +3057,7 @@ fn expr_to_columns( expr.apply(&mut |expr: &Arc| { Ok({ - if let Some(column) = expr.as_any().downcast_ref::() { + if let Some(column) = expr.downcast_ref::() { if column.index() > left_field_len + right_field_len { return Err(DataFusionError::Internal(format!( "Column index {} out of range", @@ -3121,7 +3108,7 @@ impl TreeNodeRewriter for JoinFilterRewriter<'_> { type Node = Arc; fn f_down(&mut self, node: Self::Node) -> datafusion::common::Result> { - if let Some(column) = node.as_any().downcast_ref::() { + if let Some(column) = node.downcast_ref::() { if column.index() < self.left_field_len { // left side let new_index = self diff --git a/native/core/src/parquet/cast_column.rs b/native/core/src/parquet/cast_column.rs index 67558b5266..1cc928d1d5 100644 --- a/native/core/src/parquet/cast_column.rs +++ b/native/core/src/parquet/cast_column.rs @@ -31,7 +31,6 @@ use datafusion::common::ScalarValue; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::{ - any::Any, fmt::{self, Display}, hash::Hash, sync::Arc, @@ -250,10 +249,6 @@ impl Display for CometCastColumnExpr { } impl PhysicalExpr for CometCastColumnExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> DataFusionResult { Ok(self.target_field.data_type().clone()) } diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index 9a348e5b62..1e5d0c5ac0 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -33,7 +33,6 @@ use datafusion_physical_expr_adapter::{ PhysicalExprAdapterFactory, }; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use std::any::Any; use std::collections::HashMap; use std::fmt::{self, Display}; use std::hash::{Hash, Hasher}; @@ -454,7 +453,7 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { // Walk the expression tree to find Column references let mut duplicate_err: Option = None; let _ = Arc::::clone(&expr).transform(|e| { - if let Some(col) = e.as_any().downcast_ref::() { + if let Some(col) = e.downcast_ref::() { if let Some((req, matched)) = check_column_duplicate(col.name(), orig_physical) { duplicate_err = Some(DataFusionError::External(Box::new( @@ -504,7 +503,7 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter { // the actual parquet stream schema, which uses the original physical names. let expr = if let Some(name_map) = &self.logical_to_physical_names { expr.transform(|e| { - if let Some(col) = e.as_any().downcast_ref::() { + if let Some(col) = e.downcast_ref::() { if let Some(physical_name) = name_map.get(col.name()) { return Ok(Transformed::yes(Arc::new(Column::new( physical_name, @@ -533,7 +532,7 @@ impl SparkPhysicalExprAdapter { expr: Arc, ) -> DataFusionResult> { expr.transform(|e| { - if let Some(column) = e.as_any().downcast_ref::() { + if let Some(column) = e.downcast_ref::() { let col_name = column.name(); // Resolve fields by name because this is the fallback path @@ -623,21 +622,29 @@ impl SparkPhysicalExprAdapter { .data() } - /// Replace CastColumnExpr (DataFusion's cast) with Spark's Cast expression. + /// Replace CastExpr (DataFusion's cast) with Spark's Cast expression. fn replace_with_spark_cast( &self, expr: Arc, ) -> DataFusionResult>> { - // Check for CastColumnExpr and replace with spark_expr::Cast - // CastColumnExpr is in datafusion_physical_expr::expressions - if let Some(cast) = expr - .as_any() - .downcast_ref::() + // Check for CastExpr and replace with spark_expr::Cast + if let Some(cast) = expr.downcast_ref::() { let child = Arc::clone(cast.expr()); - let physical_type = cast.input_field().data_type(); let target_type = cast.target_field().data_type(); + // Derive input field from the child Column expression and the physical schema. + // DF main removed CastColumnExpr in favor of CastExpr, so we recover the input + // field from the child Column rather than calling cast.input_field(). + let input_field = if let Some(col) = child.downcast_ref::() { + Arc::new(self.physical_file_schema.field(col.index()).clone()) + } else { + // Fallback: synthesize a field from the target field name and child data type + let child_type = cast.expr().data_type(&self.physical_file_schema)?; + Arc::new(Field::new(cast.target_field().name(), child_type, true)) + }; + let physical_type = input_field.data_type(); + // Reject reading a string/binary Parquet column as anything else. Spark's // `ParquetVectorUpdaterFactory.getUpdater` BINARY case allows StringType / // BinaryType, or DecimalType only when the column carries a @@ -647,7 +654,7 @@ impl SparkPhysicalExprAdapter { // See #4088 and #4351. if is_string_or_binary(physical_type) && !is_string_or_binary(target_type) { return Err(parquet_schema_convert_err( - cast.input_field().name(), + input_field.name(), physical_type, target_type, )); @@ -672,7 +679,7 @@ impl SparkPhysicalExprAdapter { let rejection = reject_on_non_empty_expr( child, cast.target_field(), - cast.input_field().name(), + input_field.name(), physical_type, target_type, ); @@ -692,7 +699,7 @@ impl SparkPhysicalExprAdapter { let dst_int_precision = i32::from(*dst_p) - i32::from(*dst_s); if dst_s < src_s || dst_int_precision < src_int_precision { return Err(parquet_schema_convert_err( - cast.input_field().name(), + input_field.name(), physical_type, target_type, )); @@ -716,7 +723,7 @@ impl SparkPhysicalExprAdapter { let dst_int_precision = i32::from(dst_p) - i32::from(dst_s); if dst_int_precision < min_int_precision { return Err(parquet_schema_convert_err( - cast.input_field().name(), + input_field.name(), physical_type, target_type, )); @@ -740,7 +747,7 @@ impl SparkPhysicalExprAdapter { let rejection = reject_on_non_empty_expr( Arc::clone(&child), cast.target_field(), - cast.input_field().name(), + input_field.name(), physical_type, target_type, ); @@ -800,7 +807,7 @@ impl SparkPhysicalExprAdapter { let rejection = reject_on_non_empty_expr( child, cast.target_field(), - cast.input_field().name(), + input_field.name(), physical_type, target_type, ); @@ -820,7 +827,7 @@ impl SparkPhysicalExprAdapter { }; if is_complex(physical_type) != is_complex(target_type) { return Err(parquet_schema_convert_err( - cast.input_field().name(), + input_field.name(), physical_type, target_type, )); @@ -842,7 +849,7 @@ impl SparkPhysicalExprAdapter { let comet_cast: Arc = Arc::new( CometCastColumnExpr::new( child, - Arc::clone(cast.input_field()), + input_field, Arc::clone(cast.target_field()), None, ) @@ -996,10 +1003,6 @@ impl Display for RejectOnNonEmpty { } impl PhysicalExpr for RejectOnNonEmpty { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> DataFusionResult { Ok(self.target_field.data_type().clone()) } diff --git a/native/shuffle/src/schema_align.rs b/native/shuffle/src/schema_align.rs index 56042cfb0b..b5566becef 100644 --- a/native/shuffle/src/schema_align.rs +++ b/native/shuffle/src/schema_align.rs @@ -50,7 +50,6 @@ use datafusion::{ }; use futures::{Stream, StreamExt}; use std::{ - any::Any, collections::HashSet, pin::Pin, sync::{Arc, Mutex, OnceLock}, @@ -182,10 +181,6 @@ impl DisplayAs for SchemaAlignExec { } impl ExecutionPlan for SchemaAlignExec { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::clone(&self.target_schema) } diff --git a/native/shuffle/src/shuffle_writer.rs b/native/shuffle/src/shuffle_writer.rs index 7b6f4ca7e2..756e753b3c 100644 --- a/native/shuffle/src/shuffle_writer.rs +++ b/native/shuffle/src/shuffle_writer.rs @@ -40,7 +40,6 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt}; use std::{ - any::Any, fmt, fmt::{Debug, Formatter}, sync::Arc, @@ -119,11 +118,6 @@ impl DisplayAs for ShuffleWriterExec { #[async_trait] impl ExecutionPlan for ShuffleWriterExec { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "ShuffleWriterExec" } @@ -343,6 +337,7 @@ mod test { } #[tokio::test] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `ZSTD_createCCtx` async fn shuffle_partitioner_memory() { let batch = create_batch(900); assert_eq!(8316, batch.get_array_memory_size()); // Not stable across Arrow versions diff --git a/native/spark-expr/src/agg_funcs/avg.rs b/native/spark-expr/src/agg_funcs/avg.rs index 3760b42504..24a9a30991 100644 --- a/native/spark-expr/src/agg_funcs/avg.rs +++ b/native/spark-expr/src/agg_funcs/avg.rs @@ -28,7 +28,7 @@ use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature, }; use datafusion::physical_expr::expressions::format_state_name; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use arrow::array::ArrowNativeTypeOp; use datafusion::logical_expr::function::{AccumulatorArgs, StateFieldsArgs}; @@ -67,11 +67,6 @@ impl Avg { } impl AggregateUDFImpl for Avg { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { // All numeric types use Float64 accumulation after casting match (&self.input_data_type, &self.result_data_type) { @@ -239,7 +234,7 @@ where impl GroupsAccumulator for AvgGroupsAccumulator where T: ArrowNumericType + Send, - F: Fn(T::Native, i64) -> Result + Send, + F: Fn(T::Native, i64) -> Result + Send + 'static, { fn update_batch( &mut self, diff --git a/native/spark-expr/src/agg_funcs/avg_decimal.rs b/native/spark-expr/src/agg_funcs/avg_decimal.rs index 05d74e1f99..a71bbe44ea 100644 --- a/native/spark-expr/src/agg_funcs/avg_decimal.rs +++ b/native/spark-expr/src/agg_funcs/avg_decimal.rs @@ -28,7 +28,7 @@ use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature, }; use datafusion::physical_expr::expressions::format_state_name; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use crate::utils::{build_bool_state, is_valid_decimal_precision, unlikely}; use crate::{decimal_sum_overflow_error, EvalMode, SparkErrorWithContext}; @@ -108,11 +108,6 @@ impl AvgDecimal { } impl AggregateUDFImpl for AvgDecimal { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { match (&self.sum_data_type, &self.result_data_type) { (Decimal128(sum_precision, sum_scale), Decimal128(target_precision, target_scale)) => { diff --git a/native/spark-expr/src/agg_funcs/correlation.rs b/native/spark-expr/src/agg_funcs/correlation.rs index 7e1d85cf78..189b76c863 100644 --- a/native/spark-expr/src/agg_funcs/correlation.rs +++ b/native/spark-expr/src/agg_funcs/correlation.rs @@ -18,7 +18,7 @@ use arrow::array::{Array, ArrayRef, BooleanArray, Float64Array}; use arrow::compute::{and, is_not_null}; use arrow::datatypes::{DataType, Field, FieldRef}; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use crate::agg_funcs::covariance::{CovarianceAccumulator, CovarianceGroupsAccumulator}; use crate::agg_funcs::stddev::StddevAccumulator; @@ -26,7 +26,6 @@ use crate::agg_funcs::variance::VarianceGroupsAccumulator; use arrow::compute::filter; use datafusion::common::{Result, ScalarValue}; use datafusion::logical_expr::function::{AccumulatorArgs, StateFieldsArgs}; -use datafusion::logical_expr::type_coercion::aggregates::NUMERICS; use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature, Volatility, }; @@ -51,18 +50,16 @@ impl Correlation { assert!(matches!(data_type, DataType::Float64)); Self { name: name.into(), - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::exact( + vec![DataType::Float64, DataType::Float64], + Volatility::Immutable, + ), null_on_divide_by_zero, } } } impl AggregateUDFImpl for Correlation { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/native/spark-expr/src/agg_funcs/covariance.rs b/native/spark-expr/src/agg_funcs/covariance.rs index 276121c11b..e5b77ad33b 100644 --- a/native/spark-expr/src/agg_funcs/covariance.rs +++ b/native/spark-expr/src/agg_funcs/covariance.rs @@ -23,13 +23,11 @@ use arrow::compute::cast; use arrow::datatypes::{DataType, Field, FieldRef, Float64Type}; use datafusion::common::{downcast_value, unwrap_or_internal_err, Result, ScalarValue}; use datafusion::logical_expr::function::{AccumulatorArgs, StateFieldsArgs}; -use datafusion::logical_expr::type_coercion::aggregates::NUMERICS; use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature, Volatility, }; use datafusion::physical_expr::expressions::format_state_name; use datafusion::physical_expr::expressions::StatsType; -use std::any::Any; use std::mem::size_of; use std::sync::Arc; @@ -66,7 +64,10 @@ impl Covariance { assert!(matches!(data_type, DataType::Float64)); Self { name: name.into(), - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::exact( + vec![DataType::Float64, DataType::Float64], + Volatility::Immutable, + ), stats_type, null_on_divide_by_zero, } @@ -74,11 +75,6 @@ impl Covariance { } impl AggregateUDFImpl for Covariance { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/native/spark-expr/src/agg_funcs/stddev.rs b/native/spark-expr/src/agg_funcs/stddev.rs index 4f971a0dfc..3389a86af6 100644 --- a/native/spark-expr/src/agg_funcs/stddev.rs +++ b/native/spark-expr/src/agg_funcs/stddev.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use crate::agg_funcs::variance::{VarianceAccumulator, VarianceGroupsAccumulator}; use arrow::array::{ArrayRef, AsArray, BooleanArray, Float64Array}; @@ -78,11 +78,6 @@ impl Stddev { } impl AggregateUDFImpl for Stddev { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/native/spark-expr/src/agg_funcs/sum_decimal.rs b/native/spark-expr/src/agg_funcs/sum_decimal.rs index 180e884bf0..4ddf13c5d6 100644 --- a/native/spark-expr/src/agg_funcs/sum_decimal.rs +++ b/native/spark-expr/src/agg_funcs/sum_decimal.rs @@ -27,7 +27,7 @@ use datafusion::logical_expr::Volatility::Immutable; use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature, }; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; #[derive(Debug)] pub struct SumDecimal { @@ -99,10 +99,6 @@ impl SumDecimal { } impl AggregateUDFImpl for SumDecimal { - fn as_any(&self) -> &dyn Any { - self - } - fn accumulator(&self, _args: AccumulatorArgs) -> DFResult> { Ok(Box::new(SumDecimalAccumulator::new( self.precision, diff --git a/native/spark-expr/src/agg_funcs/sum_int.rs b/native/spark-expr/src/agg_funcs/sum_int.rs index 5ce6066772..59a4371772 100644 --- a/native/spark-expr/src/agg_funcs/sum_int.rs +++ b/native/spark-expr/src/agg_funcs/sum_int.rs @@ -29,7 +29,7 @@ use datafusion::logical_expr::Volatility::Immutable; use datafusion::logical_expr::{ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature, }; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; #[derive(Debug, PartialEq, Eq, Hash)] pub struct SumInteger { @@ -52,10 +52,6 @@ impl SumInteger { } impl AggregateUDFImpl for SumInteger { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "sum" } diff --git a/native/spark-expr/src/agg_funcs/variance.rs b/native/spark-expr/src/agg_funcs/variance.rs index ebf48c82f2..f1841729cf 100644 --- a/native/spark-expr/src/agg_funcs/variance.rs +++ b/native/spark-expr/src/agg_funcs/variance.rs @@ -26,7 +26,6 @@ use datafusion::logical_expr::{ }; use datafusion::physical_expr::expressions::format_state_name; use datafusion::physical_expr::expressions::StatsType; -use std::any::Any; use std::mem::size_of; use std::sync::Arc; @@ -72,11 +71,6 @@ impl Variance { } impl AggregateUDFImpl for Variance { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { &self.name } diff --git a/native/spark-expr/src/array_funcs/array_compact.rs b/native/spark-expr/src/array_funcs/array_compact.rs index 5795ec64f8..18e28b9afa 100644 --- a/native/spark-expr/src/array_funcs/array_compact.rs +++ b/native/spark-expr/src/array_funcs/array_compact.rs @@ -33,7 +33,6 @@ use datafusion::common::{exec_err, utils::take_function_args, Result}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; -use std::any::Any; use std::sync::Arc; #[derive(Debug, PartialEq, Eq, Hash)] @@ -56,10 +55,6 @@ impl SparkArrayCompact { } impl ScalarUDFImpl for SparkArrayCompact { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "spark_array_compact" } diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index bce00483bc..e638c440fd 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -30,7 +30,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Debug, Display, Formatter}, sync::Arc, }; @@ -92,10 +91,6 @@ impl ArrayInsert { } impl PhysicalExpr for ArrayInsert { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/array_funcs/array_position.rs b/native/spark-expr/src/array_funcs/array_position.rs index dbcd8615df..a5841b27d3 100644 --- a/native/spark-expr/src/array_funcs/array_position.rs +++ b/native/spark-expr/src/array_funcs/array_position.rs @@ -28,7 +28,6 @@ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; use num::Float; -use std::any::Any; use std::sync::Arc; /// Spark array_position() function that returns the 1-based position of an element in an array. @@ -313,10 +312,6 @@ impl SparkArrayPositionFunc { } impl ScalarUDFImpl for SparkArrayPositionFunc { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "spark_array_position" } diff --git a/native/spark-expr/src/array_funcs/array_slice.rs b/native/spark-expr/src/array_funcs/array_slice.rs index ec6ac46b03..e9dc05330b 100644 --- a/native/spark-expr/src/array_funcs/array_slice.rs +++ b/native/spark-expr/src/array_funcs/array_slice.rs @@ -34,7 +34,6 @@ use datafusion::logical_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; -use std::any::Any; use std::sync::Arc; #[derive(Debug, PartialEq, Eq, Hash)] @@ -57,10 +56,6 @@ impl SparkArraySlice { } impl ScalarUDFImpl for SparkArraySlice { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "spark_array_slice" } diff --git a/native/spark-expr/src/array_funcs/arrays_overlap.rs b/native/spark-expr/src/array_funcs/arrays_overlap.rs index 662186e614..889a0314df 100644 --- a/native/spark-expr/src/array_funcs/arrays_overlap.rs +++ b/native/spark-expr/src/array_funcs/arrays_overlap.rs @@ -39,7 +39,6 @@ use datafusion::common::{exec_err, utils::take_function_args, Result, ScalarValu use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; -use std::any::Any; use std::sync::Arc; #[derive(Debug, PartialEq, Eq, Hash)] @@ -62,10 +61,6 @@ impl SparkArraysOverlap { } impl ScalarUDFImpl for SparkArraysOverlap { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "spark_arrays_overlap" } diff --git a/native/spark-expr/src/array_funcs/arrays_zip.rs b/native/spark-expr/src/array_funcs/arrays_zip.rs index 2126eb732c..6f0332d3d8 100644 --- a/native/spark-expr/src/array_funcs/arrays_zip.rs +++ b/native/spark-expr/src/array_funcs/arrays_zip.rs @@ -27,7 +27,6 @@ use datafusion::common::cast::{as_fixed_size_list_array, as_large_list_array, as use datafusion::common::{exec_err, Result, ScalarValue}; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; -use std::any::Any; use std::fmt::{Display, Formatter}; use std::sync::Arc; // TODO: Reuse functions from DF @@ -74,10 +73,6 @@ impl Display for SparkArraysZipFunc { } impl PhysicalExpr for SparkArraysZipFunc { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, input_schema: &Schema) -> Result { let fields = self.fields(input_schema)?; Ok(List(Arc::new(Field::new_list_field( diff --git a/native/spark-expr/src/array_funcs/get_array_struct_fields.rs b/native/spark-expr/src/array_funcs/get_array_struct_fields.rs index dc05a3b7f0..a873392499 100644 --- a/native/spark-expr/src/array_funcs/get_array_struct_fields.rs +++ b/native/spark-expr/src/array_funcs/get_array_struct_fields.rs @@ -27,7 +27,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Debug, Display, Formatter}, sync::Arc, }; @@ -75,10 +74,6 @@ impl GetArrayStructFields { } impl PhysicalExpr for GetArrayStructFields { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, input_schema: &Schema) -> DataFusionResult { let struct_field = self.child_field(input_schema)?; match self.child.data_type(input_schema)? { diff --git a/native/spark-expr/src/array_funcs/list_extract.rs b/native/spark-expr/src/array_funcs/list_extract.rs index c44237efc9..6f14d4605f 100644 --- a/native/spark-expr/src/array_funcs/list_extract.rs +++ b/native/spark-expr/src/array_funcs/list_extract.rs @@ -26,7 +26,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Debug, Display, Formatter}, sync::Arc, }; @@ -113,10 +112,6 @@ impl ListExtract { } impl PhysicalExpr for ListExtract { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/array_funcs/size.rs b/native/spark-expr/src/array_funcs/size.rs index 9777553341..f206b299d6 100644 --- a/native/spark-expr/src/array_funcs/size.rs +++ b/native/spark-expr/src/array_funcs/size.rs @@ -21,7 +21,6 @@ use datafusion::common::{exec_err, DataFusionError, Result as DataFusionResult, use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark size() function that returns the size of arrays or maps. @@ -73,10 +72,6 @@ impl SparkSizeFunc { } impl ScalarUDFImpl for SparkSizeFunc { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "size" } diff --git a/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs b/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs index 2a7a4b5506..8920b30c9d 100644 --- a/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs +++ b/native/spark-expr/src/bloom_filter/bloom_filter_agg.rs @@ -17,7 +17,7 @@ use arrow::datatypes::{Field, FieldRef}; use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use crate::bloom_filter::spark_bloom_filter; use crate::bloom_filter::spark_bloom_filter::{SparkBloomFilter, SparkBloomFilterVersion}; @@ -45,7 +45,7 @@ pub struct BloomFilterAgg { #[inline] fn extract_i32_from_literal(expr: Arc) -> i32 { - match expr.as_any().downcast_ref::().unwrap().value() { + match (*expr).downcast_ref::().unwrap().value() { ScalarValue::Int64(scalar_value) => scalar_value.unwrap() as i32, _ => { unreachable!() @@ -81,10 +81,6 @@ impl BloomFilterAgg { } impl AggregateUDFImpl for BloomFilterAgg { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "bloom_filter_agg" } diff --git a/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs b/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs index ea246dfb25..66168444d9 100644 --- a/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs +++ b/native/spark-expr/src/bloom_filter/bloom_filter_might_contain.rs @@ -22,7 +22,6 @@ use datafusion::error::DataFusionError; use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_plan::ColumnarValue; -use std::any::Any; use std::sync::Arc; use crate::bloom_filter::spark_bloom_filter::SparkBloomFilter; @@ -63,10 +62,6 @@ fn evaluate_bloom_filter( } impl ScalarUDFImpl for BloomFilterMightContain { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "might_contain" } diff --git a/native/spark-expr/src/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs index 62eeaa2b1d..64d97a9d50 100644 --- a/native/spark-expr/src/comet_scalar_funcs.rs +++ b/native/spark-expr/src/comet_scalar_funcs.rs @@ -38,7 +38,6 @@ use datafusion::logical_expr::{ Volatility, }; use datafusion::physical_plan::ColumnarValue; -use std::any::Any; use std::fmt::Debug; use std::sync::Arc; @@ -316,10 +315,6 @@ impl CometScalarFunction { } impl ScalarUDFImpl for CometScalarFunction { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { self.name.as_str() } diff --git a/native/spark-expr/src/conditional_funcs/if_expr.rs b/native/spark-expr/src/conditional_funcs/if_expr.rs index 6b1291fbbe..5e57c54a22 100644 --- a/native/spark-expr/src/conditional_funcs/if_expr.rs +++ b/native/spark-expr/src/conditional_funcs/if_expr.rs @@ -24,7 +24,7 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::{expressions::CaseExpr, PhysicalExpr}; use std::fmt::{Display, Formatter}; use std::hash::Hash; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; /// IfExpr is a wrapper around CaseExpr, because `IF(a, b, c)` is semantically equivalent to /// `CASE WHEN a THEN b ELSE c END`. @@ -83,11 +83,6 @@ impl IfExpr { } impl PhysicalExpr for IfExpr { - /// Return a reference to Any that can be used for down-casting - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs index 1f574f1231..dadcd1048e 100644 --- a/native/spark-expr/src/conversion_funcs/cast.rs +++ b/native/spark-expr/src/conversion_funcs/cast.rs @@ -63,7 +63,6 @@ use datafusion::common::{internal_err, DataFusionError, Result as DataFusionResu use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_plan::ColumnarValue; use std::{ - any::Any, fmt::{Debug, Display, Formatter}, hash::Hash, sync::Arc, @@ -740,10 +739,6 @@ impl Display for Cast { } impl PhysicalExpr for Cast { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/csv_funcs/to_csv.rs b/native/spark-expr/src/csv_funcs/to_csv.rs index f41cb7f918..01fdc901cb 100644 --- a/native/spark-expr/src/csv_funcs/to_csv.rs +++ b/native/spark-expr/src/csv_funcs/to_csv.rs @@ -23,7 +23,6 @@ use arrow::datatypes::{DataType, Schema}; use datafusion::common::Result; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::Hash; use std::sync::Arc; @@ -77,10 +76,6 @@ impl Display for ToCsv { } impl PhysicalExpr for ToCsv { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _: &Schema) -> Result { Ok(DataType::Utf8) } diff --git a/native/spark-expr/src/datetime_funcs/date_diff.rs b/native/spark-expr/src/datetime_funcs/date_diff.rs index ca148c103a..be3c1d3552 100644 --- a/native/spark-expr/src/datetime_funcs/date_diff.rs +++ b/native/spark-expr/src/datetime_funcs/date_diff.rs @@ -22,7 +22,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark-compatible date_diff function. @@ -52,10 +51,6 @@ impl Default for SparkDateDiff { } impl ScalarUDFImpl for SparkDateDiff { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "date_diff" } diff --git a/native/spark-expr/src/datetime_funcs/date_from_unix_date.rs b/native/spark-expr/src/datetime_funcs/date_from_unix_date.rs index 1c88fc47ab..0e624e6472 100644 --- a/native/spark-expr/src/datetime_funcs/date_from_unix_date.rs +++ b/native/spark-expr/src/datetime_funcs/date_from_unix_date.rs @@ -21,7 +21,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result, Sca use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark-compatible date_from_unix_date function. @@ -48,10 +47,6 @@ impl Default for SparkDateFromUnixDate { } impl ScalarUDFImpl for SparkDateFromUnixDate { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "date_from_unix_date" } diff --git a/native/spark-expr/src/datetime_funcs/date_trunc.rs b/native/spark-expr/src/datetime_funcs/date_trunc.rs index aeae18e36f..7ceb5234e1 100644 --- a/native/spark-expr/src/datetime_funcs/date_trunc.rs +++ b/native/spark-expr/src/datetime_funcs/date_trunc.rs @@ -22,7 +22,6 @@ use datafusion::common::{ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use crate::kernels::temporal::{date_trunc_array_fmt_dyn, date_trunc_dyn}; @@ -51,10 +50,6 @@ impl Default for SparkDateTrunc { } impl ScalarUDFImpl for SparkDateTrunc { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "date_trunc" } diff --git a/native/spark-expr/src/datetime_funcs/extract_date_part.rs b/native/spark-expr/src/datetime_funcs/extract_date_part.rs index acb7d2266e..7344a3953a 100644 --- a/native/spark-expr/src/datetime_funcs/extract_date_part.rs +++ b/native/spark-expr/src/datetime_funcs/extract_date_part.rs @@ -22,7 +22,7 @@ use datafusion::common::{internal_datafusion_err, DataFusionError}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::{any::Any, fmt::Debug}; +use std::fmt::Debug; macro_rules! extract_date_part { ($struct_name:ident, $fn_name:expr, $date_part_variant:ident) => { @@ -44,10 +44,6 @@ macro_rules! extract_date_part { } impl ScalarUDFImpl for $struct_name { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { $fn_name } diff --git a/native/spark-expr/src/datetime_funcs/hours.rs b/native/spark-expr/src/datetime_funcs/hours.rs index ea3ef742a4..26ec35ef25 100644 --- a/native/spark-expr/src/datetime_funcs/hours.rs +++ b/native/spark-expr/src/datetime_funcs/hours.rs @@ -32,7 +32,7 @@ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; use num::integer::div_floor; -use std::{any::Any, fmt::Debug, sync::Arc}; +use std::{fmt::Debug, sync::Arc}; const MICROS_PER_HOUR: i64 = 3_600_000_000; @@ -56,10 +56,6 @@ impl Default for SparkHoursTransform { } impl ScalarUDFImpl for SparkHoursTransform { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "hours_transform" } diff --git a/native/spark-expr/src/datetime_funcs/make_date.rs b/native/spark-expr/src/datetime_funcs/make_date.rs index 4835680c3a..02c9160587 100644 --- a/native/spark-expr/src/datetime_funcs/make_date.rs +++ b/native/spark-expr/src/datetime_funcs/make_date.rs @@ -23,7 +23,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark-compatible make_date function. @@ -109,10 +108,6 @@ fn make_date(year: i32, month: i32, day: i32) -> Option { } impl ScalarUDFImpl for SparkMakeDate { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "make_date" } diff --git a/native/spark-expr/src/datetime_funcs/make_time.rs b/native/spark-expr/src/datetime_funcs/make_time.rs index 154ef3bf46..ec95d19044 100644 --- a/native/spark-expr/src/datetime_funcs/make_time.rs +++ b/native/spark-expr/src/datetime_funcs/make_time.rs @@ -22,7 +22,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; const MICROS_PER_SECOND: i128 = 1_000_000; @@ -89,10 +88,6 @@ fn make_time(hours: i32, minutes: i32, secs_and_micros_unscaled: i128) -> Result } impl ScalarUDFImpl for SparkMakeTime { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "make_time" } diff --git a/native/spark-expr/src/datetime_funcs/next_day.rs b/native/spark-expr/src/datetime_funcs/next_day.rs index 207fc40a75..df4c2f9096 100644 --- a/native/spark-expr/src/datetime_funcs/next_day.rs +++ b/native/spark-expr/src/datetime_funcs/next_day.rs @@ -23,7 +23,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark-compatible `next_day(start_date, day_of_week)` function. @@ -80,10 +79,6 @@ fn next_date_for_day_of_week(days: i32, weekday: Weekday) -> Option { } impl ScalarUDFImpl for SparkNextDay { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "next_day" } diff --git a/native/spark-expr/src/datetime_funcs/seconds_to_timestamp.rs b/native/spark-expr/src/datetime_funcs/seconds_to_timestamp.rs index 2da1ac73b0..ab9388692b 100644 --- a/native/spark-expr/src/datetime_funcs/seconds_to_timestamp.rs +++ b/native/spark-expr/src/datetime_funcs/seconds_to_timestamp.rs @@ -24,7 +24,6 @@ use datafusion::common::{utils::take_function_args, DataFusionError, Result, Sca use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; -use std::any::Any; use std::sync::Arc; const MICROS_PER_SECOND: i64 = 1_000_000; @@ -61,10 +60,6 @@ impl Default for SparkSecondsToTimestamp { } impl ScalarUDFImpl for SparkSecondsToTimestamp { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "seconds_to_timestamp" } diff --git a/native/spark-expr/src/datetime_funcs/timestamp_trunc.rs b/native/spark-expr/src/datetime_funcs/timestamp_trunc.rs index 3435d3ee50..c26c5ef08e 100644 --- a/native/spark-expr/src/datetime_funcs/timestamp_trunc.rs +++ b/native/spark-expr/src/datetime_funcs/timestamp_trunc.rs @@ -23,7 +23,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Debug, Display, Formatter}, sync::Arc, }; @@ -85,10 +84,6 @@ impl Display for TimestampTruncExpr { } impl PhysicalExpr for TimestampTruncExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/datetime_funcs/unix_timestamp.rs b/native/spark-expr/src/datetime_funcs/unix_timestamp.rs index f8e932f396..bd62563a6b 100644 --- a/native/spark-expr/src/datetime_funcs/unix_timestamp.rs +++ b/native/spark-expr/src/datetime_funcs/unix_timestamp.rs @@ -24,7 +24,7 @@ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; use num::integer::div_floor; -use std::{any::Any, fmt::Debug, sync::Arc}; +use std::{fmt::Debug, sync::Arc}; const MICROS_PER_SECOND: i64 = 1_000_000; @@ -46,10 +46,6 @@ impl SparkUnixTimestamp { } impl ScalarUDFImpl for SparkUnixTimestamp { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "unix_timestamp" } diff --git a/native/spark-expr/src/json_funcs/from_json.rs b/native/spark-expr/src/json_funcs/from_json.rs index 685ea3c8ec..eaca6db016 100644 --- a/native/spark-expr/src/json_funcs/from_json.rs +++ b/native/spark-expr/src/json_funcs/from_json.rs @@ -86,10 +86,6 @@ impl PartialEq for FromJson { } impl PhysicalExpr for FromJson { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/json_funcs/json_array_length.rs b/native/spark-expr/src/json_funcs/json_array_length.rs index eef879da3c..272b0b72d5 100644 --- a/native/spark-expr/src/json_funcs/json_array_length.rs +++ b/native/spark-expr/src/json_funcs/json_array_length.rs @@ -23,8 +23,6 @@ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; - use serde::de::{IgnoredAny, SeqAccess, Visitor}; use serde::Deserializer; use std::fmt; @@ -53,10 +51,6 @@ impl JsonArrayLength { } impl ScalarUDFImpl for JsonArrayLength { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "json_array_length" } diff --git a/native/spark-expr/src/json_funcs/to_json.rs b/native/spark-expr/src/json_funcs/to_json.rs index 66f912da88..016b1315ac 100644 --- a/native/spark-expr/src/json_funcs/to_json.rs +++ b/native/spark-expr/src/json_funcs/to_json.rs @@ -88,10 +88,6 @@ impl PartialEq for ToJson { } impl PhysicalExpr for ToJson { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/jvm_udf/mod.rs b/native/spark-expr/src/jvm_udf/mod.rs index 0e3968e60a..0ca603ac9b 100644 --- a/native/spark-expr/src/jvm_udf/mod.rs +++ b/native/spark-expr/src/jvm_udf/mod.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -107,10 +106,6 @@ impl PartialEq for JvmScalarUdfExpr { impl Eq for JvmScalarUdfExpr {} impl PhysicalExpr for JvmScalarUdfExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/math_funcs/internal/checkoverflow.rs b/native/spark-expr/src/math_funcs/internal/checkoverflow.rs index f1fb9c2f02..f867fe0c4d 100644 --- a/native/spark-expr/src/math_funcs/internal/checkoverflow.rs +++ b/native/spark-expr/src/math_funcs/internal/checkoverflow.rs @@ -28,7 +28,6 @@ use std::hash::Hash; use crate::SparkError; use std::{ - any::Any, fmt::{Display, Formatter}, sync::Arc, }; @@ -91,10 +90,6 @@ impl Display for CheckOverflow { } impl PhysicalExpr for CheckOverflow { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } @@ -274,9 +269,6 @@ mod tests { } impl PhysicalExpr for ScalarChild { - fn as_any(&self) -> &dyn Any { - self - } fn data_type(&self, _: &Schema) -> datafusion::common::Result { Ok(DataType::Decimal128(self.1, self.2)) } diff --git a/native/spark-expr/src/math_funcs/internal/decimal_rescale_check.rs b/native/spark-expr/src/math_funcs/internal/decimal_rescale_check.rs index 1322404951..667f76939a 100644 --- a/native/spark-expr/src/math_funcs/internal/decimal_rescale_check.rs +++ b/native/spark-expr/src/math_funcs/internal/decimal_rescale_check.rs @@ -29,7 +29,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Display, Formatter}, sync::Arc, }; @@ -154,10 +153,6 @@ fn rescale_and_check( } impl PhysicalExpr for DecimalRescaleCheckOverflow { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } @@ -392,9 +387,6 @@ mod tests { } impl PhysicalExpr for ScalarChild { - fn as_any(&self) -> &dyn Any { - self - } fn data_type(&self, _: &Schema) -> datafusion::common::Result { Ok(DataType::Decimal128(self.1, self.2)) } diff --git a/native/spark-expr/src/math_funcs/internal/normalize_nan.rs b/native/spark-expr/src/math_funcs/internal/normalize_nan.rs index b3838f64f4..165ae3acc9 100644 --- a/native/spark-expr/src/math_funcs/internal/normalize_nan.rs +++ b/native/spark-expr/src/math_funcs/internal/normalize_nan.rs @@ -26,7 +26,6 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::hash::Hash; use std::{ - any::Any, fmt::{Display, Formatter}, sync::Arc, }; @@ -57,10 +56,6 @@ impl NormalizeNaNAndZero { } impl PhysicalExpr for NormalizeNaNAndZero { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/math_funcs/negative.rs b/native/spark-expr/src/math_funcs/negative.rs index a268894086..650fa401ef 100644 --- a/native/spark-expr/src/math_funcs/negative.rs +++ b/native/spark-expr/src/math_funcs/negative.rs @@ -29,7 +29,7 @@ use datafusion::{ }; use std::fmt::{Display, Formatter}; use std::hash::Hash; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; pub fn create_negate_expr( expr: Arc, @@ -96,11 +96,6 @@ impl std::fmt::Display for NegativeExpr { } impl PhysicalExpr for NegativeExpr { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, input_schema: &Schema) -> Result { self.arg.data_type(input_schema) } diff --git a/native/spark-expr/src/math_funcs/wide_decimal_binary_expr.rs b/native/spark-expr/src/math_funcs/wide_decimal_binary_expr.rs index 7ff78713be..ca4869357e 100644 --- a/native/spark-expr/src/math_funcs/wide_decimal_binary_expr.rs +++ b/native/spark-expr/src/math_funcs/wide_decimal_binary_expr.rs @@ -31,7 +31,7 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::fmt::{Display, Formatter}; use std::hash::Hash; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; /// The arithmetic operation to perform. #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] @@ -165,10 +165,6 @@ fn max_for_precision(precision: u8) -> i256 { } impl PhysicalExpr for WideDecimalBinaryExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> Result { Ok(DataType::Decimal128( self.output_precision, diff --git a/native/spark-expr/src/nondetermenistic_funcs/monotonically_increasing_id.rs b/native/spark-expr/src/nondetermenistic_funcs/monotonically_increasing_id.rs index 49a5066a38..e7404e00c1 100644 --- a/native/spark-expr/src/nondetermenistic_funcs/monotonically_increasing_id.rs +++ b/native/spark-expr/src/nondetermenistic_funcs/monotonically_increasing_id.rs @@ -20,7 +20,6 @@ use arrow::datatypes::{DataType, Schema}; use datafusion::common::Result; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; -use std::any::Any; use std::fmt::{Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::atomic::{AtomicI64, Ordering}; @@ -66,10 +65,6 @@ impl Hash for MonotonicallyIncreasingId { } impl PhysicalExpr for MonotonicallyIncreasingId { - fn as_any(&self) -> &dyn Any { - self - } - fn evaluate(&self, batch: &RecordBatch) -> Result { let start = self .current_offset diff --git a/native/spark-expr/src/nondetermenistic_funcs/rand.rs b/native/spark-expr/src/nondetermenistic_funcs/rand.rs index e23a83d84e..8f07f37344 100644 --- a/native/spark-expr/src/nondetermenistic_funcs/rand.rs +++ b/native/spark-expr/src/nondetermenistic_funcs/rand.rs @@ -23,7 +23,6 @@ use arrow::datatypes::{DataType, Schema}; use datafusion::common::Result; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::{Arc, Mutex}; @@ -120,10 +119,6 @@ impl Hash for RandExpr { } impl PhysicalExpr for RandExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> Result { Ok(DataType::Float64) } diff --git a/native/spark-expr/src/nondetermenistic_funcs/randn.rs b/native/spark-expr/src/nondetermenistic_funcs/randn.rs index 40fafedc20..7d50d24811 100644 --- a/native/spark-expr/src/nondetermenistic_funcs/randn.rs +++ b/native/spark-expr/src/nondetermenistic_funcs/randn.rs @@ -22,7 +22,6 @@ use arrow::array::RecordBatch; use arrow::datatypes::{DataType, Schema}; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::{Arc, Mutex}; @@ -131,10 +130,6 @@ impl Hash for RandnExpr { } impl PhysicalExpr for RandnExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> datafusion::common::Result { Ok(DataType::Float64) } diff --git a/native/spark-expr/src/predicate_funcs/rlike.rs b/native/spark-expr/src/predicate_funcs/rlike.rs index ed5970a6a2..ee005dd1ac 100644 --- a/native/spark-expr/src/predicate_funcs/rlike.rs +++ b/native/spark-expr/src/predicate_funcs/rlike.rs @@ -25,7 +25,6 @@ use datafusion::common::{internal_err, Result, ScalarValue}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_plan::ColumnarValue; use regex::Regex; -use std::any::Any; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -102,10 +101,6 @@ impl Display for RLike { } impl PhysicalExpr for RLike { - fn as_any(&self) -> &dyn Any { - self - } - fn data_type(&self, _input_schema: &Schema) -> Result { Ok(DataType::Boolean) } diff --git a/native/spark-expr/src/string_funcs/contains.rs b/native/spark-expr/src/string_funcs/contains.rs index bc34ce9cba..537227efdf 100644 --- a/native/spark-expr/src/string_funcs/contains.rs +++ b/native/spark-expr/src/string_funcs/contains.rs @@ -27,7 +27,6 @@ use datafusion::common::{exec_err, Result, ScalarValue}; use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use std::any::Any; use std::sync::Arc; /// Spark-optimized contains function. @@ -53,10 +52,6 @@ impl SparkContains { } impl ScalarUDFImpl for SparkContains { - fn as_any(&self) -> &dyn Any { - self - } - fn name(&self) -> &str { "contains" } diff --git a/native/spark-expr/src/string_funcs/substring.rs b/native/spark-expr/src/string_funcs/substring.rs index bd36e302a6..3e3fdd4d59 100644 --- a/native/spark-expr/src/string_funcs/substring.rs +++ b/native/spark-expr/src/string_funcs/substring.rs @@ -26,7 +26,6 @@ use arrow::record_batch::RecordBatch; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::{ - any::Any, fmt::{Display, Formatter}, hash::Hash, sync::Arc, @@ -70,10 +69,6 @@ impl Display for SubstringExpr { } impl PhysicalExpr for SubstringExpr { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/struct_funcs/create_named_struct.rs b/native/spark-expr/src/struct_funcs/create_named_struct.rs index 70e03ad0c0..1a63cb1cc9 100644 --- a/native/spark-expr/src/struct_funcs/create_named_struct.rs +++ b/native/spark-expr/src/struct_funcs/create_named_struct.rs @@ -22,7 +22,6 @@ use datafusion::common::Result as DataFusionResult; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::{ - any::Any, fmt::{Display, Formatter}, hash::Hash, sync::Arc, @@ -53,10 +52,6 @@ impl CreateNamedStruct { } impl PhysicalExpr for CreateNamedStruct { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/struct_funcs/get_struct_field.rs b/native/spark-expr/src/struct_funcs/get_struct_field.rs index b82560a4e7..62447624f5 100644 --- a/native/spark-expr/src/struct_funcs/get_struct_field.rs +++ b/native/spark-expr/src/struct_funcs/get_struct_field.rs @@ -23,7 +23,6 @@ use datafusion::common::{DataFusionError, Result as DataFusionResult, ScalarValu use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr::PhysicalExpr; use std::{ - any::Any, fmt::{Display, Formatter}, hash::Hash, sync::Arc, @@ -84,10 +83,6 @@ impl GetStructField { } impl PhysicalExpr for GetStructField { - fn as_any(&self) -> &dyn Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/unbound.rs b/native/spark-expr/src/unbound.rs index cf0adafa91..69187dbc02 100644 --- a/native/spark-expr/src/unbound.rs +++ b/native/spark-expr/src/unbound.rs @@ -59,11 +59,6 @@ impl std::fmt::Display for UnboundColumn { } impl PhysicalExpr for UnboundColumn { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn std::any::Any { - self - } - fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(self, f) } diff --git a/native/spark-expr/src/url_funcs/parse_url.rs b/native/spark-expr/src/url_funcs/parse_url.rs index 3f78d9ce71..25d4ca94fe 100644 --- a/native/spark-expr/src/url_funcs/parse_url.rs +++ b/native/spark-expr/src/url_funcs/parse_url.rs @@ -21,7 +21,6 @@ //! which diverges from Spark's java.net.URI (RFC 3986) on several edge cases. //! This module uses RFC 3986 Appendix B regex parsing to match Spark exactly. -use std::any::Any; use std::sync::{Arc, LazyLock}; use arrow::array::{ @@ -182,9 +181,6 @@ impl Default for CometParseUrl { } impl ScalarUDFImpl for CometParseUrl { - fn as_any(&self) -> &dyn Any { - self - } fn name(&self) -> &str { "parse_url" } @@ -220,9 +216,6 @@ impl Default for CometTryParseUrl { } impl ScalarUDFImpl for CometTryParseUrl { - fn as_any(&self) -> &dyn Any { - self - } fn name(&self) -> &str { "try_parse_url" } diff --git a/spark/src/main/scala/org/apache/comet/CometConf.scala b/spark/src/main/scala/org/apache/comet/CometConf.scala index aabd64b9b3..3c7d60d613 100644 --- a/spark/src/main/scala/org/apache/comet/CometConf.scala +++ b/spark/src/main/scala/org/apache/comet/CometConf.scala @@ -291,9 +291,10 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled") .category(CATEGORY_ENABLE_EXEC) - .doc("Experimental support for Sort Merge Join with filter") + .doc("Support for Sort Merge Join with filter. " + + "Deprecated: this config will be removed in a future release.") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val COMET_TRACING_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.tracing.enabled") .category(CATEGORY_TUNING) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 7d5f13bb1c..6349de6f30 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -134,7 +134,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { classOf[Logarithm] -> CometLogarithm, classOf[Multiply] -> CometMultiply, classOf[Pi] -> CometScalarFunction("pi"), - classOf[Pow] -> CometScalarFunction("pow"), + classOf[Pow] -> CometPow, classOf[Rand] -> CometRand, classOf[Randn] -> CometRandn, classOf[Remainder] -> CometRemainder, diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index 2ce75ccc0d..bc6214a188 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -44,7 +44,8 @@ private object CometGetDateField extends Enumeration { // 2 = Monday, ..., 7 = Saturday). val DayOfWeek: Value = Value("dow") val DayOfYear: Value = Value("doy") - val WeekDay: Value = Value("isodow") // day of the week where Monday is 0 + // Datafusion `isodow` is 1..=7 with Monday=1; Spark `WeekDay` is 0..=6 with Monday=0. + val WeekDay: Value = Value("isodow") val WeekOfYear: Value = Value("week") val Quarter: Value = Value("quarter") } @@ -144,7 +145,27 @@ object CometWeekDay extends CometExpressionSerde[WeekDay] with CometExprGetDateF expr: WeekDay, inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { - getDateField(expr, CometGetDateField.WeekDay, inputs, binding) + // Datafusion `isodow` is 1..=7 with Monday=1, but Spark `WeekDay` is 0..=6 with Monday=0, + // so subtract 1 from the result of datepart(isodow, ...). + // TODO: fix upstream to avoid substraction + // https://github.com/apache/datafusion/issues/22599 + val optExpr = getDateField(expr, CometGetDateField.WeekDay, inputs, binding) + .zip(exprToProtoInternal(Literal(1), inputs, binding)) + .map { case (left, right) => + Expr + .newBuilder() + .setSubtract( + ExprOuterClass.MathExpr + .newBuilder() + .setLeft(left) + .setRight(right) + .setEvalMode(ExprOuterClass.EvalMode.LEGACY) + .setReturnType(serializeDataType(IntegerType).get) + .build()) + .build() + } + .headOption + optExprWithFallbackReason(optExpr, expr, expr.child) } } diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index ce53cce023..d8eeb3994d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -19,7 +19,7 @@ package org.apache.comet.serde -import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, BRound, Ceil, CheckOverflow, Conv, Expression, Floor, Hex, Hypot, If, LessThanOrEqual, Literal, Log, Log10, Log1p, Log2, Logarithm, NaNvl, Pmod, UnaryPositive, Unhex, WidthBucket} +import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, BRound, Ceil, CheckOverflow, Conv, Expression, Floor, Hex, Hypot, If, LessThanOrEqual, Literal, Log, Log10, Log1p, Log2, Logarithm, NaNvl, Pmod, Pow, UnaryPositive, Unhex, WidthBucket} import org.apache.spark.sql.types.{DecimalType, DoubleType, NumericType} import org.apache.comet.CometSparkSessionExtensions.withFallbackReason @@ -196,6 +196,27 @@ object CometAbs extends CometExpressionSerde[Abs] with MathExprBase { } } +object CometPow extends CometExpressionSerde[Pow] { + + // https://github.com/apache/datafusion/issues/22598 + val unsupportedReason: String = "Power has correctness issues" + + override def getUnsupportedReasons(): Seq[String] = Seq(unsupportedReason) + + override def getSupportLevel(expr: Pow): SupportLevel = + Unsupported(Some(unsupportedReason)) + + override def convert( + expr: Pow, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + val leftExpr = exprToProtoInternal(expr.left, inputs, binding) + val rightExpr = exprToProtoInternal(expr.right, inputs, binding) + val optExpr = scalarFunctionExprToProto("pow", leftExpr, rightExpr) + optExprWithFallbackReason(optExpr, expr, expr.left, expr.right) + } +} + sealed trait MathExprBase { protected def nullIfNegative(expression: Expression): Expression = { val zero = Literal.default(expression.dataType) diff --git a/spark/src/test/resources/sql-tests/expressions/math/pow.sql b/spark/src/test/resources/sql-tests/expressions/math/pow.sql index 78e4c3e6c4..f1111167ba 100644 --- a/spark/src/test/resources/sql-tests/expressions/math/pow.sql +++ b/spark/src/test/resources/sql-tests/expressions/math/pow.sql @@ -19,19 +19,23 @@ statement CREATE TABLE test_pow(base double, exp double) USING parquet statement -INSERT INTO test_pow VALUES (2.0, 3.0), (0.0, 0.0), (-1.0, 2.0), (-1.0, 0.5), (2.0, -1.0), (NULL, 2.0), (2.0, NULL), (cast('NaN' as double), 2.0), (cast('Infinity' as double), 2.0), (2.0, cast('Infinity' as double)) +INSERT INTO test_pow VALUES (0.0, -1), (2.0, 3.0), (0.0, 0.0), (-1.0, 2.0), (-1.0, 0.5), (2.0, -1.0), (NULL, 2.0), (2.0, NULL), (cast('NaN' as double), 2.0), (cast('Infinity' as double), 2.0), (2.0, cast('Infinity' as double)) -query tolerance=1e-6 +-- query tolerance=1e-6 +query expect_fallback(Power has correctness issues) SELECT pow(base, exp) FROM test_pow -- column + literal -query tolerance=1e-6 +-- query tolerance=1e-6 +query expect_fallback(Power has correctness issues) SELECT pow(base, 2.0) FROM test_pow -- literal + column -query tolerance=1e-6 +-- query tolerance=1e-6 +query expect_fallback(Power has correctness issues) SELECT pow(2.0, exp) FROM test_pow -- literal + literal -query tolerance=1e-6 +-- query tolerance=1e-6 +query expect_fallback(Power has correctness issues) SELECT pow(2.0, 3.0), pow(0.0, 0.0), pow(-1.0, 2.0), pow(NULL, 2.0) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 3bfeccae26..6c492f9029 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -1431,7 +1431,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } withParquetTable(doubleValues.flatMap(m => doubleValues.map(n => (m, n))), "tbl") { // expressions with two args - for (expr <- Seq("atan2", "pow")) { + for (expr <- Seq("atan2")) { val (_, cometPlan) = checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => diff --git a/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala index 96deae66e0..f01d5e2109 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala @@ -657,6 +657,42 @@ class CometJoinSuite extends CometTestBase { } } + // Reproducer for SPARK-43113: full outer SMJ with a join filter that references + // a nullable column should not match when the filter evaluates to NULL. + test("SPARK-43113: Full outer SMJ with NULL in join filter") { + withTempView("l", "r") { + // testData2: (a, b) — all non-null + Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) + .toDF("a", "b") + .createOrReplaceTempView("l") + + // testData3: (a, b) — b is nullable + Seq((1, None), (2, Some(2))) + .toDF("a", "b") + .createOrReplaceTempView("r") + + val query = + """select /*+ MERGE(r) */ * + |from l + |full outer join r + |on l.a = r.a + |and l.b < (r.b + 1) + |and l.b < (r.a + 1)""".stripMargin + + val expected = Seq( + (Some(1), Some(1), None, None), + (Some(1), Some(2), None, None), + (None, None, Some(1), None), + (Some(2), Some(1), Some(2), Some(2)), + (Some(2), Some(2), Some(2), Some(2)), + (Some(3), Some(1), None, None), + (Some(3), Some(2), None, None)).toDF("a", "b", "a", "b") + + val df = sql(query) + checkAnswer(df, expected) + } + } + test("Broadcast exchange respects AQE shuffle partition coalescing") { // When a shuffle feeds into a broadcast exchange, AQE may coalesce the shuffle // partitions. The broadcast collect should execute through the AQEShuffleReadExec