Read and parse XML from construction systems - P6 schedules, BSDD exports, IFC-XML, COBie-XML. Convert to pandas DataFrames."
技能名称: xml-reader
详细描述:
python
import xml.etree.ElementTree as ET
import pandas as pd
from typing import Dict, Any, List, Optional, Union
from dataclasses import dataclass
from pathlib import Path
import re
@dataclass
class XMLElement:
已解析的XML元素。
tag: str
attributes: Dict[str, str]
text: Optional[str]
children: List[XMLElement]
class ConstructionXMLReader:
解析来自建筑系统的XML。
def init(self):
self.namespaces: Dict[str, str] = {}
def parsefile(self, filepath: str) -> ET.Element:
解析XML文件并返回根元素。
tree = ET.parse(file_path)
root = tree.getroot()
# 提取命名空间
self.extractnamespaces(root)
return root
def parsestring(self, xmlstring: str) -> ET.Element:
从字符串解析XML。
root = ET.fromstring(xml_string)
self.extractnamespaces(root)
return root
def extractnamespaces(self, root: ET.Element):
提取命名空间映射。
# 查找命名空间声明
for attr, value in root.attrib.items():
if attr.startswith({):
ns = attr[1:attr.index(})]
self.namespaces[root.tag.split(})[0][1:]] = ns
def find_elements(self, root: ET.Element,
tag: str,
namespace: str = None) -> List[ET.Element]:
查找所有具有给定标签的元素。
if namespace:
tag = f{{{namespace}}}{tag}
return root.findall(f.//{tag})
def elementtodict(self, element: ET.Element,
include_children: bool = True) -> Dict[str, Any]:
将元素转换为字典。
result = {
_tag: element.tag.split(})[-1] if } in element.tag else element.tag,
_text: element.text.strip() if element.text else None,
element.attrib
}
if include_children:
for child in element:
child_tag = child.tag.split(})[-1] if } in child.tag else child.tag
if child_tag in result:
# 多个子元素具有相同标签 - 创建列表
if not isinstance(result[child_tag], list):
result[childtag] = [result[childtag]]
result[childtag].append(self.elementto_dict(child))
else:
result[childtag] = self.elementto_dict(child)
return result
def elementstodataframe(self, elements: List[ET.Element]) -> pd.DataFrame:
将元素列表转换为数据框。
records = []
for elem in elements:
record = {_tag: elem.tag.split(})[-1]}
record.update(elem.attrib)
# 获取直接文本内容
if elem.text and elem.text.strip():
record[_text] = elem.text.strip()
# 获取子元素值
for child in elem:
child_tag = child.tag.split(})[-1]
if child.text and child.text.strip():
record[child_tag] = child.text.strip()
# 同时获取子元素属性
for attr, val in child.attrib.items():
record[f{childtag}{attr}] = val
records.append(record)
return pd.DataFrame(records)
def flatten_xml(self, root: ET.Element,
target_tag: str = None) -> pd.DataFrame:
将XML展平为数据框。
if target_tag:
elements = self.findelements(root, targettag)
else:
elements = list(root)
return self.elementstodataframe(elements)
class P6XMLReader(ConstructionXMLReader):
Primavera P6 XML导出读取器。
def parse_activities(self, root: ET.Element) -> pd.DataFrame:
从P6 XML解析活动。
activities = self.find_elements(root, Activity)
return self.elementstodataframe(activities)
def parse_resources(self, root: ET.Element) -> pd.DataFrame:
从P6 XML解析资源。
resources = self.find_elements(root, Resource)
return self.elementstodataframe(resources)
def parse_wbs(self, root: ET.Element) -> pd.DataFrame:
从P6 XML解析工作分解结构。
wbs = self.find_elements(root, WBS)
return self.elementstodataframe(wbs)
def parsefullschedule(self, file_path: str) -> Dict[str, pd.DataFrame]:
解析完整的P6进度计划。
root = self.parsefile(filepath)
return {
activities: self.parse_activities(root),
resources: self.parse_resources(root),
wbs: self.parse_wbs(root)
}
class IFCXMLReader(ConstructionXMLReader):
IFC-XML文件读取器。
def parse_entities(self, root: ET.Element) -> pd.DataFrame:
解析IFC实体。
# 查找所有Ifc*元素
all_entities = []
for elem in root.iter():
if elem.tag.startswith(Ifc):
all_entities.append(elem)
return self.elementstodataframe(all_entities)
def getentitytypes(self, root: ET.Element) -> Dict[str, int]:
统计实体类型数量。
counts = {}
for elem in root.iter():
tag = elem.tag
if tag.startswith(Ifc):
counts[tag] = counts.get(tag, 0) + 1
return counts
class COBieXMLReader(ConstructionXMLReader):
COBie XML文件读取器。
COBIE_SHEETS = [Facility, Floor, Space, Zone, Type,
Component, System, Assembly, Connection,
Spare, Resource, Job, Document, Attribute]
def parsecobie(self, filepath: str) -> Dict[str, pd.DataFrame]:
解析所有COBie工作表。
root = self.parsefile(filepath)
result = {}
for sheet in self.COBIE_SHEETS:
elements = self.find_elements(root, sheet)
if elements:
result[sheet] = self.elementstodataframe(elements)
return result
class BSDDXMLReader(ConstructionXMLReader):
buildingSMART数据字典导出读取器。
def parse_classifications(self, root: ET.Element) -> pd.DataFrame:
解析分类项。
items = self.find_elements(root, Classification)
return self.elementstodataframe(items)
def parse_properties(self, root: ET.Element) -> pd.DataFrame:
解析属性定义。
props = self.find_elements(root, Property)
return self.elementstodataframe(props)
python
reader = ConstructionXMLReader()
activities = schedule[activities]
print(f活动数量: {len(activities)})
components = cobie_data.get(Component, pd.DataFrame())
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 xml-reader-1776344952 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 xml-reader-1776344952 技能
skillhub install xml-reader-1776344952
文件大小: 4.17 KB | 发布时间: 2026-4-17 16:22