npx skillsauth add 305s/magicallesson layout-analyzerInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
This skill enables document layout analysis using surya - an advanced document understanding system. Detect text blocks, tables, figures, headings, and determine reading order in complex documents.
Example prompts:
from surya.detection import DetectionPredictor
from surya.layout import LayoutPredictor
from surya.reading_order import ReadingOrderPredictor
from PIL import Image
# Load image
image = Image.open("document.png")
# Detect layout elements
layout_predictor = LayoutPredictor()
layout_result = layout_predictor([image])
| Element | Description | |---------|-------------| | Text | Regular paragraph text | | Title | Document/section titles | | Section-header | Section headings | | List-item | Bulleted/numbered items | | Table | Tabular data | | Figure | Images/diagrams | | Caption | Figure/table captions | | Footnote | Footnotes | | Formula | Mathematical equations | | Page-header | Headers | | Page-footer | Footers |
from surya.detection import DetectionPredictor
from PIL import Image
# Initialize detector
detector = DetectionPredictor()
# Load image
image = Image.open("document.png")
# Detect text regions
results = detector([image])
# Access results
for page_result in results:
for bbox in page_result.bboxes:
print(f"Text region: {bbox.bbox}")
print(f"Confidence: {bbox.confidence}")
from surya.layout import LayoutPredictor
from PIL import Image
# Initialize layout predictor
layout_predictor = LayoutPredictor()
# Analyze layout
image = Image.open("document.png")
layout_results = layout_predictor([image])
# Process results
for page_result in layout_results:
for element in page_result.bboxes:
print(f"Type: {element.label}")
print(f"Bbox: {element.bbox}")
print(f"Confidence: {element.confidence}")
from surya.reading_order import ReadingOrderPredictor
from surya.layout import LayoutPredictor
from PIL import Image
# Get layout first
layout_predictor = LayoutPredictor()
image = Image.open("document.png")
layout_results = layout_predictor([image])
# Determine reading order
reading_order_predictor = ReadingOrderPredictor()
order_results = reading_order_predictor([image], layout_results)
# Access ordered elements
for page_result in order_results:
for i, element in enumerate(page_result.ordered_bboxes):
print(f"{i+1}. {element.label}: {element.bbox}")
from surya.ocr import OCRPredictor
from surya.layout import LayoutPredictor
from PIL import Image
# Initialize predictors
ocr_predictor = OCRPredictor()
layout_predictor = LayoutPredictor()
# Load image
image = Image.open("document.png")
# Get layout
layout_results = layout_predictor([image])
# Run OCR
ocr_results = ocr_predictor([image])
# Combine results
for layout, ocr in zip(layout_results, ocr_results):
for layout_elem in layout.bboxes:
print(f"Element: {layout_elem.label}")
# Find OCR text within this layout element
for text_line in ocr.text_lines:
if boxes_overlap(layout_elem.bbox, text_line.bbox):
print(f" Text: {text_line.text}")
from surya.layout import LayoutPredictor
from pdf2image import convert_from_path
def analyze_pdf_layout(pdf_path):
"""Analyze layout of all pages in PDF."""
# Convert PDF to images
images = convert_from_path(pdf_path)
# Initialize predictor
layout_predictor = LayoutPredictor()
# Analyze all pages
results = layout_predictor(images)
document_structure = []
for page_num, page_result in enumerate(results):
page_elements = []
for element in page_result.bboxes:
page_elements.append({
'type': element.label,
'bbox': element.bbox,
'confidence': element.confidence
})
document_structure.append({
'page': page_num + 1,
'elements': page_elements
})
return document_structure
structure = analyze_pdf_layout("document.pdf")
from surya.layout import LayoutPredictor
from PIL import Image, ImageDraw, ImageFont
def visualize_layout(image_path, output_path):
"""Visualize detected layout elements."""
image = Image.open(image_path)
layout_predictor = LayoutPredictor()
results = layout_predictor([image])
# Create drawing context
draw = ImageDraw.Draw(image)
# Color mapping for element types
colors = {
'Text': 'blue',
'Title': 'red',
'Table': 'green',
'Figure': 'purple',
'Section-header': 'orange',
'List-item': 'cyan',
}
for element in results[0].bboxes:
bbox = element.bbox
color = colors.get(element.label, 'gray')
# Draw rectangle
draw.rectangle(bbox, outline=color, width=2)
# Add label
draw.text((bbox[0], bbox[1] - 15),
f"{element.label} ({element.confidence:.2f})",
fill=color)
image.save(output_path)
return output_path
def extract_document_structure(image_path):
"""Extract hierarchical document structure."""
from surya.layout import LayoutPredictor
from surya.reading_order import ReadingOrderPredictor
image = Image.open(image_path)
# Get layout
layout_predictor = LayoutPredictor()
layout_results = layout_predictor([image])
# Get reading order
order_predictor = ReadingOrderPredictor()
order_results = order_predictor([image], layout_results)
structure = {
'title': None,
'sections': [],
'tables': [],
'figures': []
}
current_section = None
for element in order_results[0].ordered_bboxes:
if element.label == 'Title':
structure['title'] = element
elif element.label == 'Section-header':
current_section = {'header': element, 'content': []}
structure['sections'].append(current_section)
elif element.label == 'Table':
structure['tables'].append(element)
elif element.label == 'Figure':
structure['figures'].append(element)
elif current_section and element.label in ['Text', 'List-item']:
current_section['content'].append(element)
return structure
def extract_table_regions(image_path):
"""Extract table regions from document."""
from surya.layout import LayoutPredictor
image = Image.open(image_path)
layout_predictor = LayoutPredictor()
results = layout_predictor([image])
tables = []
for element in results[0].bboxes:
if element.label == 'Table':
bbox = element.bbox
# Crop table region
table_image = image.crop(bbox)
tables.append({
'bbox': bbox,
'image': table_image,
'confidence': element.confidence
})
return tables
from surya.layout import LayoutPredictor
from surya.reading_order import ReadingOrderPredictor
from pdf2image import convert_from_path
def analyze_academic_paper(pdf_path):
"""Analyze structure of academic paper."""
images = convert_from_path(pdf_path)
layout_predictor = LayoutPredictor()
order_predictor = ReadingOrderPredictor()
paper_structure = {
'pages': [],
'element_counts': {
'Title': 0,
'Section-header': 0,
'Text': 0,
'Table': 0,
'Figure': 0,
'Formula': 0,
'Footnote': 0
}
}
layout_results = layout_predictor(images)
order_results = order_predictor(images, layout_results)
for page_num, (layout, order) in enumerate(zip(layout_results, order_results)):
page_structure = {
'page': page_num + 1,
'elements': []
}
for element in order.ordered_bboxes:
page_structure['elements'].append({
'type': element.label,
'bbox': element.bbox,
'order': element.position
})
# Count element types
if element.label in paper_structure['element_counts']:
paper_structure['element_counts'][element.label] += 1
paper_structure['pages'].append(page_structure)
return paper_structure
paper = analyze_academic_paper('research_paper.pdf')
print(f"Total tables: {paper['element_counts']['Table']}")
print(f"Total figures: {paper['element_counts']['Figure']}")
from surya.layout import LayoutPredictor
from PIL import Image
def detect_form_fields(image_path):
"""Detect form fields and labels."""
image = Image.open(image_path)
layout_predictor = LayoutPredictor()
results = layout_predictor([image])
form_fields = []
for element in results[0].bboxes:
# Look for text elements that might be labels
if element.label == 'Text':
# Check if there's a box/line nearby (potential input field)
form_fields.append({
'type': 'potential_label',
'bbox': element.bbox,
'confidence': element.confidence
})
return form_fields
fields = detect_form_fields('form.png')
print(f"Found {len(fields)} potential form elements")
from surya.layout import LayoutPredictor
from surya.reading_order import ReadingOrderPredictor
from PIL import Image
def process_multicolumn_article(image_path):
"""Process multi-column article layout."""
image = Image.open(image_path)
layout_predictor = LayoutPredictor()
order_predictor = ReadingOrderPredictor()
layout_results = layout_predictor([image])
order_results = order_predictor([image], layout_results)
# Group elements by column
image_width = image.width
column_threshold = image_width / 2
columns = {
'left': [],
'right': [],
'full_width': []
}
for element in order_results[0].ordered_bboxes:
bbox = element.bbox
element_center = (bbox[0] + bbox[2]) / 2
element_width = bbox[2] - bbox[0]
# Determine column
if element_width > column_threshold * 1.5:
columns['full_width'].append(element)
elif element_center < column_threshold:
columns['left'].append(element)
else:
columns['right'].append(element)
return {
'layout': 'multi-column',
'columns': columns,
'reading_order': order_results[0].ordered_bboxes
}
article = process_multicolumn_article('newspaper_page.png')
print(f"Left column: {len(article['columns']['left'])} elements")
print(f"Right column: {len(article['columns']['right'])} elements")
pip install surya-ocr
# For PDF processing
pip install pdf2image
tools
Automate customer support workflows with Zendesk ticket management, routing, and analytics
tools
Automate YouTube content workflows including video management, analytics, scheduling, and channel optimization
content-media
Create, edit, and manipulate Excel spreadsheets programmatically using openpyxl
testing
Use when creating new skills, editing existing skills, or verifying skills work before deployment