.remote-cache/kreuzberg-shared-rules/.ai-rulez/skills/polyglot-error-handling-standardization/SKILL.md
______________________________________________________________________ ## priority: critical # Polyglot Error Handling Standardization **FFI error conversion · Language-specific exceptions · Error context preservation · Safe boundaries** ## Error Conversion at FFI Boundaries Error handling is language-specific; FFI boundaries MUST convert between error models: ### Rust → Host Language **Rust errors MUST be converted to language-appropriate types at FFI boundary:** - **Rust `Result<T, E>`
npx skillsauth add kreuzberg-dev/html-to-markdown .remote-cache/kreuzberg-shared-rules/.ai-rulez/skills/polyglot-error-handling-standardizationInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
FFI error conversion · Language-specific exceptions · Error context preservation · Safe boundaries
Error handling is language-specific; FFI boundaries MUST convert between error models:
Rust errors MUST be converted to language-appropriate types at FFI boundary:
Result<T, E> (sum type) → Host exception/error/nil// Rust core (kreuzberg_core::lib.rs)
pub fn parse_document(data: &[u8]) -> Result<Document, ParseError> {
// ... parsing logic
}
// FFI boundary (kreuzberg_pyo3::lib.rs)
use pyo3::prelude::*;
#[pyfunction]
fn parse_document(py: Python, data: &[u8]) -> PyResult<PyDocument> {
kreuzberg_core::parse_document(data)
.map_err(|e| PyErr::new::<pyo3::exceptions::ValueError, _>(
format!("Parse error: {} at line {}", e.message, e.line)
))
.map(|doc| PyDocument { inner: doc })
}
ValueError, OSError, RuntimeError)__init__.py inheriting from kreuzberg.KreuzbergError# Python error handling
class KreuzbergError(Exception):
"""Base exception for Kreuzberg errors"""
def __init__(self, message: str, code: int | None = None, context: dict | None = None):
self.message = message
self.code = code
self.context = context or {}
super().__init__(message)
class ParseError(KreuzbergError):
"""Parse error with line/column info"""
def __init__(self, message: str, line: int, column: int):
self.line = line
self.column = column
super().__init__(f"{message} at line {line}:{column}", code=1001)
# Usage
try:
result = kreuzberg.parse_document(data)
except ParseError as e:
print(f"Error at {e.line}:{e.column}: {e.message}")
# Log context for debugging
logger.error("Parse failed", extra=e.context)
// TypeScript error handling
class KreuzbergError extends Error {
constructor(
message: string,
public code: number,
public context?: Record<string, unknown>
) {
super(message);
this.name = "KreuzbergError";
}
}
class ParseError extends KreuzbergError {
constructor(message: string, public line: number, public column: number) {
super(message, 1001, { line, column });
this.name = "ParseError";
}
}
// Promise rejection
async function parseDocument(data: Uint8Array): Promise<Document> {
return new Promise((resolve, reject) => {
kreuzberg.parseDocument(data, (err: Error | null, doc?: Document) => {
if (err) {
if (err instanceof ParseError) {
reject(new ParseError(err.message, err.line, err.column));
} else {
reject(new KreuzbergError(err.message, 500));
}
} else {
resolve(doc!);
}
});
});
}
// Async/await with error handling
try {
const doc = await parseDocument(data);
} catch (e) {
if (e instanceof ParseError) {
console.error(`Parse error at ${e.line}:${e.column}: ${e.message}`);
} else if (e instanceof KreuzbergError) {
console.error(`Kreuzberg error [${e.code}]: ${e.message}`);
} else {
console.error("Unknown error:", e);
}
}
# Ruby error handling
module Kreuzberg
class Error < StandardError
attr_reader :code, :context
def initialize(message, code: nil, context: {})
@code = code
@context = context
super(message)
end
end
class ParseError < Error
attr_reader :line, :column
def initialize(message, line:, column:)
@line = line
@column = column
super("#{message} at #{line}:#{column}", code: 1001, context: { line: line, column: column })
end
end
end
# Usage
begin
doc = Kreuzberg.parse_document(data)
rescue Kreuzberg::ParseError => e
puts "Parse error at #{e.line}:#{e.column}: #{e.message}"
# Handle parse error specifically
rescue Kreuzberg::Error => e
puts "Kreuzberg error [#{e.code}]: #{e.message}"
# Handle other Kreuzberg errors
rescue StandardError => e
puts "Unexpected error: #{e.message}"
raise # Re-raise if unexpected
ensure
# Cleanup always happens
end
fmt.Errorf("%w", err) to preserve error chainvar ErrParse = errors.New("parse error"))// Go error handling
package kreuzberg
import (
"errors"
"fmt"
)
var (
ErrParse = errors.New("parse error")
ErrInvalidInput = errors.New("invalid input")
)
type ParseError struct {
Err error
Line int
Column int
}
func (e *ParseError) Error() string {
return fmt.Sprintf("parse error at %d:%d: %v", e.Line, e.Column, e.Err)
}
func (e *ParseError) Unwrap() error {
return e.Err
}
// Parse function
func ParseDocument(data []byte) (*Document, error) {
// ... parsing logic
if err := validateData(data); err != nil {
return nil, fmt.Errorf("%w", err) // Preserve chain
}
doc, err := parseBytes(data)
if err != nil {
return nil, &ParseError{
Err: err,
Line: getLine(),
Column: getColumn(),
}
}
return doc, nil
}
// Caller
doc, err := kreuzberg.ParseDocument(data)
if err != nil {
if errors.Is(err, kreuzberg.ErrParse) {
fmt.Printf("Got parse error\n")
}
var parseErr *kreuzberg.ParseError
if errors.As(err, &parseErr) {
fmt.Printf("Parse error at %d:%d\n", parseErr.Line, parseErr.Column)
}
return err
}
initCause() to preserve error chain// Java error handling
public class KreuzbergException extends Exception {
private final int code;
private final Map<String, Object> context;
public KreuzbergException(String message, int code) {
super(message);
this.code = code;
this.context = new HashMap<>();
}
public KreuzbergException(String message, Throwable cause, int code) {
super(message, cause);
this.code = code;
this.context = new HashMap<>();
}
public int getCode() { return code; }
public Map<String, Object> getContext() { return context; }
}
public class ParseException extends KreuzbergException {
private final int line;
private final int column;
public ParseException(String message, int line, int column) {
super(String.format("%s at %d:%d", message, line, column), 1001);
this.line = line;
this.column = column;
getContext().put("line", line);
getContext().put("column", column);
}
public int getLine() { return line; }
public int getColumn() { return column; }
}
// Usage
try {
Document doc = Kreuzberg.parseDocument(data);
} catch (ParseException e) {
System.err.printf("Parse error at %d:%d: %s%n", e.getLine(), e.getColumn(), e.getMessage());
// Handle parse error
} catch (KreuzbergException e) {
System.err.printf("Kreuzberg error [%d]: %s%n", e.getCode(), e.getMessage());
// Handle other errors
} catch (Exception e) {
System.err.println("Unexpected error: " + e.getMessage());
throw e;
}
from, Go: Errorf "%w", Java: initCause)// Rust error with full context
use anyhow::{Context, Result, anyhow};
pub fn parse_document(data: &[u8]) -> Result<Document> {
let content = String::from_utf8(data)
.context("Input must be valid UTF-8")?;
let doc = parse_impl(&content)
.with_context(|| format!("Failed to parse {} bytes", content.len()))?;
Ok(doc)
}
// Error includes: message, cause, context
// Result propagates with full chain:
// Error: Failed to parse 1024 bytes
// Caused by:
// 0: Expected 'tag' at line 5, column 10
// 1: Input must be valid UTF-8
tools
Convert HTML to Markdown, Djot, or plain text with structured extraction. Use when writing code that calls html-to-markdown APIs in Rust, Python, TypeScript, Go, Ruby, PHP, Java, C#, Elixir, R, C, or WASM. Covers installation, conversion, configuration, metadata extraction, document structure, and CLI usage.
development
Developer quick start guide with prerequisites, setup, and workflow commands
development
Common task runner commands for build, test, lint, and format workflows
tools
______________________________________________________________________ ## priority: high # Workspace Structure & Project Organization **Rust workspace** (Cargo.toml): crates/{kreuzberg,kreuzberg-py,kreuzberg-node,kreuzberg-ffi,kreuzberg-cli}, packages/ruby/ext/kreuzberg_rb/native, tools/{benchmark-harness,e2e-generator}, e2e/{rust,go}. **Language packages**: packages/{python,typescript,ruby,java,go} - thin wrappers around Rust core. **E2E tests**: Auto-generated from fixtures/ via tools/e2e