Extract data from construction images using AI Vision. Analyze site photos, scanned documents, drawings."
基于DDC方法论(第2.4章),本技能利用计算机视觉、OCR和AI模型,从施工图像中提取结构化数据,用于分析现场照片、扫描文档和图纸。
书籍参考: Преобразование данных в структурированную форму / 数据转换为结构化形式
python
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
import json
import base64
class ImageType(Enum):
施工图像类型
SITEPHOTO = sitephoto
SCANNEDDOCUMENT = scanneddocument
FLOORPLAN = floorplan
ELEVATION = elevation
DETAILDRAWING = detaildrawing
PROGRESSPHOTO = progressphoto
SAFETYPHOTO = safetyphoto
DEFECTPHOTO = defectphoto
MATERIALPHOTO = materialphoto
EQUIPMENTPHOTO = equipmentphoto
class ExtractionType(Enum):
数据提取类型
OCRTEXT = ocrtext
TABLE = table
OBJECTDETECTION = objectdetection
MEASUREMENT = measurement
CLASSIFICATION = classification
PROGRESS = progress
@dataclass
class BoundingBox:
检测区域的边界框
x: int
y: int
width: int
height: int
confidence: float = 1.0
@dataclass
class TextRegion:
从图像中提取的文本区域
text: str
bbox: BoundingBox
confidence: float
language: str = en
@dataclass
class DetectedObject:
图像中检测到的对象
label: str
bbox: BoundingBox
confidence: float
attributes: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ExtractedTable:
从图像中提取的表格
headers: List[str]
rows: List[List[str]]
bbox: BoundingBox
confidence: float
@dataclass
class ProgressMeasurement:
从图像中测量的进度
element_type: str
total_count: int
completed_count: int
percent_complete: float
area_sqft: Optional[float] = None
volume_cuft: Optional[float] = None
@dataclass
class ImageAnalysisResult:
完整的图像分析结果
image_id: str
image_type: ImageType
text_regions: List[TextRegion]
detected_objects: List[DetectedObject]
tables: List[ExtractedTable]
progress: Optional[ProgressMeasurement] = None
metadata: Dict[str, Any] = field(default_factory=dict)
processing_time: float = 0.0
class OCREngine:
用于文本提取的OCR引擎
def init(self, engine: str = tesseract):
self.engine = engine
self.supported_languages = [en, ru, de, fr, es]
def extract_text(
self,
image_data: bytes,
language: str = en
) -> List[TextRegion]:
从图像中提取文本
# 模拟OCR提取(生产环境中使用实际OCR库)
# 生产环境:pytesseract、EasyOCR或云OCR服务
regions = []
# 模拟检测图纸中的标题栏
regions.append(TextRegion(
text=PROJECT: OFFICE BUILDING,
bbox=BoundingBox(x=100, y=50, width=300, height=30, confidence=0.95),
confidence=0.95,
language=language
))
regions.append(TextRegion(
text=DRAWING: A-101,
bbox=BoundingBox(x=100, y=90, width=200, height=25, confidence=0.92),
confidence=0.92,
language=language
))
regions.append(TextRegion(
text=SCALE: 1:100,
bbox=BoundingBox(x=100, y=120, width=150, height=20, confidence=0.88),
confidence=0.88,
language=language
))
return regions
def extractstructuredtext(
self,
image_data: bytes,
template: Optional[Dict] = None
) -> Dict[str, str]:
使用模板匹配提取结构化文本
# 提取文本区域
regions = self.extracttext(imagedata)
# 匹配模板字段
structured = {}
if template:
for fieldname, fieldconfig in template.items():
# 查找匹配区域
for region in regions:
if field_config.get(keyword) in region.text.lower():
structured[field_name] = region.text
break
else:
# 默认提取
for region in regions:
if PROJECT: in region.text:
structured[project_name] = region.text.split(:)[-1].strip()
elif DRAWING: in region.text:
structured[drawing_number] = region.text.split(:)[-1].strip()
elif SCALE: in region.text:
structured[scale] = region.text.split(:)[-1].strip()
return structured
class ObjectDetector:
施工图像的对象检测
def init(self, model: str = yolov8):
self.model = model
self.constructionclasses = self.loadconstructionclasses()
def loadconstruction_classes(self) -> Dict[str, Dict]:
加载施工特定对象类别
return {
# 设备
excavator: {category: equipment, safety_zone: 20},
crane: {category: equipment, safety_zone: 30},
forklift: {category: equipment, safety_zone: 10},
concretemixer: {category: equipment, safetyzone: 5},
scaffolding: {category: equipment, safety_zone: 5},
# 安全
hard_hat: {category: ppe, required: True},
safety_vest: {category: ppe, required: True},
safety_glasses: {category: ppe, required: False},
harness: {category: ppe, required: False},
# 材料
rebar_bundle: {category: material, unit: bundle},
concrete_block: {category: material, unit: pallet},
lumber_stack: {category: material, unit: bundle},
pipe_stack: {category: material, unit: bundle},
# 工人
worker: {category: person, track: True},
# 建筑元素
column: {category: structure},
beam: {category: structure},
slab: {category: structure},
wall: {category: structure},
}
def detect(
self,
image_data: bytes,
confidence_threshold: float = 0.5
) -> List[DetectedObject]:
检测图像中的对象
# 模拟检测(生产环境中使用实际模型)
# 生产环境:YOLO、Faster R-CNN等
detected = []
# 模拟检测到的对象
sample_detections = [
(worker, 0.92, BoundingBox(200, 300, 80, 180, 0.92)),
(hard_hat, 0.88, BoundingBox(210, 300, 30, 25, 0.88)),
(safety_vest, 0.85, BoundingBox(210, 340, 60, 80, 0.85)),
(scaffolding, 0.78, BoundingBox(400, 100, 200, 400, 0.78)),
(concrete_block, 0.72, BoundingBox(50, 450, 100, 50, 0.72)),
]
for label, conf, bbox in sample_detections:
if conf >= confidence_threshold:
classinfo = self.constructionclasses.get(label, {})
detected.append(DetectedObject(
label=label,
bbox=bbox,
confidence=conf,
attributes=class_info
))
return detected
def detectsafetycompliance(
self,
image_data: bytes
) -> Dict:
检测图像中的安全合规性
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 image-to-data-1776344730 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 image-to-data-1776344730 技能
skillhub install image-to-data-1776344730
文件大小: 6.82 KB | 发布时间: 2026-4-17 14:47