diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86f1225..7f570d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,7 @@ jobs: - "png" - "pdf" - "png,pdf" + - "png,net" steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable diff --git a/.gitignore b/.gitignore index 0a729bb..7b5fc2f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ Cargo.lock # Test outputs output.png output.pdf +remote_image.png # OS files .DS_Store diff --git a/CHANGELOG.md b/CHANGELOG.md index ab2fb58..098ff56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- Remote resource loading via the optional `net` feature (enabled by default), + powered by [`blitz-net`]. Images, linked stylesheets, `@import`s, and web + fonts referenced by the HTML are now fetched and applied before rendering. + - Supported URL schemes: `http(s)://`, `file://`, and `data:`. + - `render` remains synchronous: it transparently uses a shared, process-wide + Tokio runtime (created once and reused across calls) and blocks until all + resources are loaded (subject to an internal timeout). +- `remote_image` example demonstrating remote image loading. +- Offline integration test (`tests/render_net.rs`) covering resource loading + via `data:` URIs. + +### Changed + +- Disabling the `net` feature builds with no async or TLS dependencies. + +### Notes + +- The `net` feature uses `reqwest` with the native TLS backend, which requires + OpenSSL development headers at build time. + +[`blitz-net`]: https://crates.io/crates/blitz-net + ## [0.1.0] - 2024-01-29 ### Added diff --git a/Cargo.toml b/Cargo.toml index 053979a..5c266bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,11 @@ keywords = ["html", "pdf", "png", "render", "headless"] categories = ["rendering", "graphics", "multimedia::images"] [features] -default = ["png", "pdf"] +default = ["png", "pdf", "net"] png = ["dep:anyrender", "dep:anyrender_vello_cpu", "dep:png"] pdf = ["dep:krilla", "dep:stylo", "dep:parley", "dep:linebender_resource_handle"] +# Fetch remote resources (images, stylesheets, fonts) over http(s)/file/data URLs +net = ["dep:blitz-net", "dep:tokio"] [dependencies] # Core HTML/CSS parsing and layout (always required) @@ -33,6 +35,10 @@ stylo = { version = "0.8", optional = true } # For accessing computed styles in parley = { version = "0.6", optional = true } # For text layout types linebender_resource_handle = { version = "0.1", optional = true } # For font data types +# Network resource loading (optional, enabled by default) +blitz-net = { version = "0.2.1", optional = true } +tokio = { version = "1", optional = true, features = ["rt-multi-thread", "sync", "time"] } + # Common dependencies thiserror = "2" @@ -52,3 +58,8 @@ path = "examples/simple.rs" [[example]] name = "from_file" path = "examples/from_file.rs" + +[[example]] +name = "remote_image" +path = "examples/remote_image.rs" +required-features = ["net"] diff --git a/README.md b/README.md index 84af93c..563ab5f 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ A Chromium-free HTML rendering engine for generating PNG and PDF outputs in pure - **PNG output** — High-quality raster images via CPU-based rendering - **PDF output** — Vector PDF documents with embedded fonts - **Modern CSS** — Flexbox, Grid, and common CSS properties via Stylo (Firefox's CSS engine) +- **Remote resources** — Fetch images, stylesheets, and web fonts over `http(s)`/`file`/`data` URLs (optional `net` feature, enabled by default) - **Simple API** — Single function call to render HTML to bytes ## Installation @@ -23,9 +24,18 @@ Or with specific features: ```toml [dependencies] +# Just PNG, no networking (no async/TLS dependencies) hyper-render = { version = "0.1", default-features = false, features = ["png"] } ``` +Available features (all enabled by default): + +| Feature | Description | +|---------|-------------| +| `png` | PNG output via the Vello CPU rasterizer | +| `pdf` | PDF output via Krilla | +| `net` | Fetch remote resources referenced by the HTML | + ## Quick Start ```rust @@ -90,6 +100,33 @@ let config = Config::new() | `OutputFormat::Png` | ✅ Full | Raster image via Vello CPU renderer | | `OutputFormat::Pdf` | ✅ Full | Vector PDF with embedded fonts and backgrounds | +### Network Resources + +When the `net` feature is enabled (the default), resources referenced by the +HTML — ``, ``, `@import`, and `@font-face` — are +fetched before rendering. The following URL schemes are supported: + +- `http://` and `https://` — fetched over the network +- `file://` — read from the local filesystem +- `data:` — decoded inline + +```rust +use hyper_render::{render, Config}; + +let html = r#""#; +let png = render(html, Config::default())?; // logo is fetched and painted +# Ok::<(), hyper_render::Error>(()) +``` + +Resource loading is synchronous from the caller's perspective: `render` blocks +until all referenced resources have been fetched and applied (subject to an +internal timeout), so no async runtime is required by your code. + +> **TLS note:** networking uses `reqwest` with the native TLS backend, which +> requires OpenSSL development headers at build time (e.g. `libssl-dev` on +> Debian/Ubuntu, `openssl-devel` on Fedora). To build without any networking, +> async, or TLS dependencies, disable the `net` feature. + ## Try It Yourself ### Clone and Build @@ -108,6 +145,12 @@ cargo run --example simple This generates `output.png` and `output.pdf` in the current directory. +To see remote resource loading in action (requires network + the `net` feature): + +```bash +cargo run --example remote_image +``` + ### Render Your Own HTML ```bash @@ -235,8 +278,7 @@ cargo run --example from_file -- input.html output.png 2>/dev/null ## Limitations - **JavaScript** — Not supported (by design) -- **Web fonts** — System fonts only; `@font-face` not yet supported -- **Images** — External image loading not yet implemented +- **Networking** — Remote resource loading requires the `net` feature; with it disabled, only inline (`data:`-free) content is rendered - **Some CSS** — Advanced features like `position: sticky`, complex transforms may not work ## Dependencies @@ -247,6 +289,7 @@ Core rendering stack: - [Taffy](https://github.com/DioxusLabs/taffy) — Flexbox/Grid layout - [Vello](https://github.com/linebender/vello) — 2D graphics (CPU renderer) - [Krilla](https://github.com/LaurenzV/krilla) — PDF generation +- [blitz-net](https://github.com/DioxusLabs/blitz) — Resource fetching (`net` feature) ## License diff --git a/examples/remote_image.rs b/examples/remote_image.rs new file mode 100644 index 0000000..1b83cdc --- /dev/null +++ b/examples/remote_image.rs @@ -0,0 +1,43 @@ +//! Example demonstrating remote resource loading (the `net` feature). +//! +//! With the `net` feature enabled (on by default), hyper-render fetches +//! external resources referenced by the HTML — here an image loaded over +//! HTTPS — before rendering. +//! +//! Run with: `cargo run --example remote_image` +//! +//! Note: this example requires network access and the `net` feature. Build +//! without networking using `--no-default-features --features png`. + +use hyper_render::{render, Config}; + +fn main() -> Result<(), Box> { + let html = r#" + + + + + + +
+

Fetched over HTTPS

+ Remote image +
+ + + "#; + + println!("Rendering (fetching remote image)..."); + let config = Config::new().size(440, 360).scale(2.0); + let png_bytes = render(html, config)?; + + std::fs::write("remote_image.png", &png_bytes)?; + println!("Saved remote_image.png ({} bytes)", png_bytes.len()); + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 50f66a8..7c954ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,13 +54,19 @@ mod config; mod error; +#[cfg(feature = "net")] +mod net; mod render; pub use config::{ColorScheme, Config, OutputFormat}; pub use error::{Error, Result}; +use std::sync::Arc; + +use blitz_dom::net::Resource; use blitz_dom::DocumentConfig; use blitz_html::HtmlDocument; +use blitz_traits::net::NetProvider; use blitz_traits::shell::Viewport; /// Render HTML content to the specified output format. @@ -104,12 +110,34 @@ pub fn render(html: &str, config: Config) -> Result> { // Validate configuration config.validate()?; + // Set up networking so external resources (images, stylesheets, fonts) can + // be fetched. The provider must exist before parsing so requests triggered + // during parsing/layout are dispatched. + #[cfg(feature = "net")] + let mut net_env = net::NetEnv::new()?; + + let net_provider: Option>> = { + #[cfg(feature = "net")] + { + Some(net_env.provider()) + } + #[cfg(not(feature = "net"))] + { + None + } + }; + // Parse HTML and create document - let mut document = create_document(html, &config)?; + let mut document = create_document(html, &config, net_provider)?; - // Resolve styles and compute layout + // Resolve styles and compute layout. This dispatches the initial batch of + // resource requests for linked stylesheets, images, etc. document.resolve(0.0); + // Block until all fetched resources have been applied to the document. + #[cfg(feature = "net")] + net_env.load_resources(&mut document); + // Render to the specified format match config.format { OutputFormat::Png => render::png::render_to_png(&document, &config), @@ -156,7 +184,11 @@ pub fn render_to_pdf(html: &str, config: Config) -> Result> { } /// Create and configure a Blitz document from HTML. -fn create_document(html: &str, config: &Config) -> Result { +fn create_document( + html: &str, + config: &Config, + net_provider: Option>>, +) -> Result { let viewport = Viewport::new( config.width, config.height, @@ -166,6 +198,7 @@ fn create_document(html: &str, config: &Config) -> Result { let doc_config = DocumentConfig { viewport: Some(viewport), + net_provider, ..Default::default() }; diff --git a/src/net.rs b/src/net.rs new file mode 100644 index 0000000..1346944 --- /dev/null +++ b/src/net.rs @@ -0,0 +1,126 @@ +//! Network resource loading via [`blitz_net`]. +//! +//! Blitz fetches external resources (images, linked stylesheets, web fonts, +//! CSS `@import`s, etc.) asynchronously through a [`NetProvider`]. This module +//! wraps `blitz-net`'s [`Provider`] together with a process-wide Tokio runtime +//! and provides a synchronous "fetch everything, then return" helper so the rest +//! of the crate can stay fully synchronous. +//! +//! The runtime is created lazily **once** and shared across every `render` call +//! (see [`shared_runtime`]); building a runtime is comparatively expensive, so +//! reusing it keeps per-render overhead to just a channel and a provider. A +//! multi-threaded runtime is used so concurrent renders can fetch in parallel. + +use std::sync::{Arc, OnceLock}; +use std::time::{Duration, Instant}; + +use blitz_dom::net::Resource; +use blitz_html::HtmlDocument; +use blitz_net::{MpscCallback, Provider}; +use blitz_traits::net::{NetProvider, SharedCallback}; +use tokio::runtime::{Handle, Runtime}; +use tokio::sync::mpsc::UnboundedReceiver; + +/// How long to wait for a single resource before re-checking whether any +/// requests are still in flight. Kept short so termination is responsive. +const POLL_INTERVAL: Duration = Duration::from_millis(100); + +/// Overall safety budget for the resource-loading phase. Guards against a hung +/// connection keeping a request in flight forever (reqwest has no default timeout). +const OVERALL_TIMEOUT: Duration = Duration::from_secs(30); + +/// Returns the process-wide Tokio runtime, creating it on first use. +/// +/// The runtime lives for the remainder of the process; reusing it avoids the +/// cost of spinning up a new runtime (and worker threads) on every render. +fn shared_runtime() -> std::io::Result<&'static Runtime> { + static RUNTIME: OnceLock = OnceLock::new(); + + if let Some(runtime) = RUNTIME.get() { + return Ok(runtime); + } + + // Build outside `get_or_init` so initialization errors can be propagated. + // On the rare init race the loser's runtime is simply dropped. + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + Ok(RUNTIME.get_or_init(|| runtime)) +} + +/// Holds a handle to the shared runtime, the `blitz-net` provider, and the +/// channel that receives parsed resources back from in-flight requests. +pub(crate) struct NetEnv { + handle: Handle, + provider: Arc>, + receiver: UnboundedReceiver<(usize, Resource)>, +} + +impl NetEnv { + /// Wire a network provider to the shared process-wide runtime. + pub(crate) fn new() -> std::io::Result { + let handle = shared_runtime()?.handle().clone(); + + let (receiver, callback) = MpscCallback::new(); + let callback: SharedCallback = Arc::new(callback); + + // `Provider::new` captures `Handle::current()`, so it must be created + // while the runtime is entered. + let provider = { + let _guard = handle.enter(); + Arc::new(Provider::new(callback)) + }; + + Ok(Self { + handle, + provider, + receiver, + }) + } + + /// The provider to install on the document's [`DocumentConfig`]. + pub(crate) fn provider(&self) -> Arc> { + self.provider.clone() + } + + /// Block until every in-flight resource request has completed (or the + /// overall timeout elapses), applying each fetched resource to `document`. + /// + /// Applying a resource (e.g. a stylesheet) and re-resolving layout can + /// trigger further requests (fonts, `@import`s, background images), so this + /// loops until the provider reports no outstanding requests. + pub(crate) fn load_resources(&mut self, document: &mut HtmlDocument) { + let deadline = Instant::now() + OVERALL_TIMEOUT; + + loop { + // Apply everything that has already arrived. + let mut changed = false; + while let Ok((_doc_id, resource)) = self.receiver.try_recv() { + document.load_resource(resource); + changed = true; + } + + // Nothing ready but requests are still in flight: wait for the next. + if !changed && !self.provider.is_empty() && Instant::now() < deadline { + let handle = &self.handle; + let rx = &mut self.receiver; + let received = handle + .block_on(async { tokio::time::timeout(POLL_INTERVAL, rx.recv()).await }); + if let Ok(Some((_doc_id, resource))) = received { + document.load_resource(resource); + changed = true; + } + } + + if changed { + // New resources may have invalidated layout and/or queued more + // requests; re-resolve so those get dispatched. + document.resolve(0.0); + } else if self.provider.is_empty() || Instant::now() >= deadline { + break; + } + } + + document.resolve(0.0); + } +} diff --git a/tests/render_net.rs b/tests/render_net.rs new file mode 100644 index 0000000..b24113a --- /dev/null +++ b/tests/render_net.rs @@ -0,0 +1,58 @@ +//! Integration tests for network-backed resource loading (the `net` feature). +//! +//! These deliberately use `data:` URIs so the tests run fully offline and +//! deterministically while still exercising the real `blitz-net` provider and +//! the resource-loading loop in `render`. + +#![cfg(all(feature = "net", feature = "png"))] + +use hyper_render::{render, Config}; + +/// PNG header magic bytes. +const PNG_SIGNATURE: [u8; 8] = [0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A]; + +/// Decode a PNG into `(width, height, rgba_bytes)`. +fn decode_png(data: &[u8]) -> (u32, u32, Vec) { + let decoder = png::Decoder::new(data); + let mut reader = decoder.read_info().expect("valid PNG"); + let mut buf = vec![0; reader.output_buffer_size()]; + let info = reader.next_frame(&mut buf).expect("decode PNG frame"); + buf.truncate(info.buffer_size()); + (info.width, info.height, buf) +} + +/// Sample the RGBA value at the given pixel coordinate. +fn pixel_at(data: &[u8], x: u32, y: u32) -> [u8; 4] { + let (width, _height, buf) = decode_png(data); + let idx = ((y * width + x) * 4) as usize; + [buf[idx], buf[idx + 1], buf[idx + 2], buf[idx + 3]] +} + +#[test] +fn test_external_stylesheet_via_data_uri_is_applied() { + // The linked stylesheet (a `data:` URI fetched through the net provider) + // gives `.box` its dimensions and a green background. If the resource is + // not loaded, the element has no size/background and the pixel stays blank. + let html = r#" + + + + + + +
+ + + "#; + + let config = Config::new().width(64).height(64).scale(1.0); + let bytes = render(html, config).expect("render should succeed"); + assert!(bytes.starts_with(&PNG_SIGNATURE), "output should be a PNG"); + + let [r, g, b, a] = pixel_at(&bytes, 8, 8); + assert!( + r < 40 && (100..=160).contains(&g) && b < 40 && a > 200, + "external stylesheet should paint a green box, got rgba({r},{g},{b},{a})" + ); +}