返回顶部
c

csv-handler" CSV文件处理

Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data."

作者: admin | 来源: ClawHub
源自
ClawHub
版本
V 2.1.0
安全检测
已通过
2,090
下载量
免费
免费
3
收藏
概述
安装方式
版本历史

csv-handler"

技能名称: csv-handler
详细描述:

建筑数据CSV处理器

概述

CSV是建筑行业通用的数据交换格式——从进度计划导出到成本数据库。本技能处理编码问题、分隔符检测和数据清洗。

Python实现

python
import pandas as pd
import csv
from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass
import chardet

@dataclass
class CSVProfile:
CSV文件配置文件。
encoding: str
delimiter: str
has_header: bool
row_count: int
column_count: int
columns: List[str]

class ConstructionCSVHandler:
处理来自建筑软件的CSV文件。

COMMON_DELIMITERS = [,, ;, \t, |]
COMMON_ENCODINGS = [utf-8, utf-8-sig, latin-1, cp1252, iso-8859-1]

def init(self):
self.last_profile: Optional[CSVProfile] = None

def detectencoding(self, filepath: str) -> str:
检测文件编码。
with open(file_path, rb) as f:
raw = f.read(10000)
result = chardet.detect(raw)
return result.get(encoding, utf-8) or utf-8

def detectdelimiter(self, filepath: str, encoding: str) -> str:
检测CSV分隔符。
with open(file_path, r, encoding=encoding, errors=replace) as f:
sample = f.read(5000)

# 统计出现次数
counts = {d: sample.count(d) for d in self.COMMON_DELIMITERS}

# 返回出现最频繁且一致的分隔符
if counts:
return max(counts, key=counts.get)
return ,

def profilecsv(self, filepath: str) -> CSVProfile:
分析CSV文件。
encoding = self.detectencoding(filepath)
delimiter = self.detectdelimiter(filepath, encoding)

# 读取样本
df = pd.readcsv(filepath, encoding=encoding, delimiter=delimiter,
nrows=10, onbadlines=skip)

has_header = not df.columns[0].replace(., ).replace(-, ).isdigit()

# 完整行数统计
with open(file_path, r, encoding=encoding, errors=replace) as f:
rowcount = sum(1 for in f) - (1 if has_header else 0)

profile = CSVProfile(
encoding=encoding,
delimiter=delimiter,
hasheader=hasheader,
rowcount=rowcount,
column_count=len(df.columns),
columns=list(df.columns)
)
self.last_profile = profile
return profile

def readcsv(self, filepath: str,
encoding: Optional[str] = None,
delimiter: Optional[str] = None,
clean: bool = True) -> pd.DataFrame:
自动检测并读取CSV文件。

# 若未提供则自动检测
if encoding is None:
encoding = self.detectencoding(filepath)
if delimiter is None:
delimiter = self.detectdelimiter(filepath, encoding)

# 带错误处理的读取
df = pd.read_csv(
file_path,
encoding=encoding,
delimiter=delimiter,
onbadlines=skip,
low_memory=False
)

if clean:
df = self.clean_dataframe(df)

return df

def clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
清洗建筑CSV数据。
# 清洗列名
df.columns = [self.cleancolumn_name(c) for c in df.columns]

# 移除空行和空列
df = df.dropna(how=all)
df = df.dropna(axis=1, how=all)

# 去除字符串首尾空格
for col in df.select_dtypes(include=[object]):
df[col] = df[col].str.strip() if df[col].dtype == object else df[col]

return df

def cleancolumn_name(self, name: str) -> str:
清洗列名。
if not isinstance(name, str):
return str(name)

# 移除特殊字符,替换空格
clean = name.strip().lower()
clean = clean.replace( , ).replace(-, )
clean = .join(c for c in clean if c.isalnum() or c == _)
return clean

def mergecsvs(self, filepaths: List[str],
on_column: Optional[str] = None) -> pd.DataFrame:
合并多个CSV文件。
dfs = []
for path in file_paths:
df = self.read_csv(path)
df[sourcefile] = Path(path).name
dfs.append(df)

if not dfs:
return pd.DataFrame()

if oncolumn and oncolumn in dfs[0].columns:
result = dfs[0]
for df in dfs[1:]:
result = pd.merge(result, df, on=on_column, how=outer)
return result

return pd.concat(dfs, ignore_index=True)

def split_csv(self, df: pd.DataFrame,
group_column: str,
output_dir: str) -> List[str]:
按列值拆分CSV文件。
outputpath = Path(outputdir)
outputpath.mkdir(parents=True, existok=True)

files = []
for value in df[group_column].unique():
subset = df[df[group_column] == value]
filename = f{groupcolumn}{value}.csv
filepath = output_path / filename
subset.to_csv(filepath, index=False)
files.append(str(filepath))

return files

def convert_types(self, df: pd.DataFrame,
type_map: Dict[str, str] = None) -> pd.DataFrame:
智能转换列类型。
df = df.copy()

if type_map:
for col, dtype in type_map.items():
if col in df.columns:
try:
df[col] = df[col].astype(dtype)
except:
pass
else:
# 自动转换
for col in df.columns:
# 尝试数值类型
try:
df[col] = pd.to_numeric(df[col])
continue
except:
pass

# 尝试日期时间类型
try:
df[col] = pd.to_datetime(df[col])
except:
pass

return df

def export_csv(self, df: pd.DataFrame,
file_path: str,
encoding: str = utf-8-sig,
delimiter: str = ,) -> str:
将DataFrame导出为CSV文件。
df.tocsv(filepath, encoding=encoding, sep=delimiter, index=False)
return file_path

专用处理器

class ScheduleCSVHandler(ConstructionCSVHandler): 项目进度CSV文件处理器。

SCHEDULE_COLUMNS

标签

skill ai

通过对话安装

该技能支持在以下平台通过对话安装:

OpenClaw WorkBuddy QClaw Kimi Claude

方式一:安装 SkillHub 和技能

帮我安装 SkillHub 和 csv-handler-1776345028 技能

方式二:设置 SkillHub 为优先技能安装源

设置 SkillHub 为我的优先技能安装源,然后帮我安装 csv-handler-1776345028 技能

通过命令行安装

skillhub install csv-handler-1776345028

下载

⬇ 下载 csv-handler" v2.1.0(免费)

文件大小: 4.52 KB | 发布时间: 2026-4-17 15:41

v2.1.0 最新 2026-4-17 15:41
- Added auto-detection of CSV delimiters and file encodings.
- Improved data cleaning: trims whitespace, removes empty rows/columns, and standardizes column names.
- Allows merging and splitting CSV files by column values.
- Introduced intelligent type conversion for numeric and date fields.
- Specialized classes for handling construction schedule and cost CSVs.

Archiver·手机版·闲社网·闲社论坛·羊毛社区· 多链控股集团有限公司 · 苏ICP备2025199260号-1

Powered by Discuz! X5.0   © 2024-2025 闲社网·线报更新论坛·羊毛分享社区·http://xianshe.com

p2p_official_large
返回顶部