Data analysis tool for Excel, CSV, Word, PDF, TXT, Markdown files. Use when user needs to analyze, summarize, or compare data from multiple files. Supports folder scanning, data aggregation, statistics, report generation in Markdown/Excel/Word/PDF. Multi-language support. 数据分析、文件夹分析、Excel分析。
技能名称: data-analyzer
详细描述:
分析和汇总来自 Excel、CSV、Word 和 PDF 文件的数据。
python
import os
import pandas as pd
from pathlib import Path
from docx import Document
import fitz # PyMuPDF
class DataAnalyzer:
def init(self, folder_path):
self.folder = Path(folder_path)
self.files = self.scanfiles()
def scanfiles(self):
扫描文件夹中的支持文件
files = {
excel: [], csv: [], word: [],
pdf: [], txt: [], markdown: []
}
for f in self.folder.rglob(*):
ext = f.suffix.lower()
if ext in [.xlsx, .xls]:
files[excel].append(str(f))
elif ext == .csv:
files[csv].append(str(f))
elif ext == .docx:
files[word].append(str(f))
elif ext == .pdf:
files[pdf].append(str(f))
elif ext == .txt:
files[txt].append(str(f))
elif ext in [.md, .markdown]:
files[markdown].append(str(f))
return files
def analyzeexcel(self, filepath):
分析 Excel 文件
df = pd.readexcel(filepath)
return {rows: len(df), columns: len(df.columns), data: df}
def analyzecsv(self, filepath):
分析 CSV 文件
df = pd.readcsv(filepath)
return {rows: len(df), columns: len(df.columns), data: df}
def analyzeword(self, filepath):
分析 Word 文件
doc = Document(file_path)
text = \n.join([p.text for p in doc.paragraphs if p.text.strip()])
return {paragraphs: len(doc.paragraphs), text: text}
def analyzepdf(self, filepath):
分析 PDF 文件
doc = fitz.open(file_path)
text =
for page in doc:
text += page.get_text()
result = {pages: len(doc), text: text}
doc.close()
return result
def analyzetxt(self, filepath):
分析 TXT 文件
with open(file_path, r, encoding=utf-8) as f:
text = f.read()
return {lines: len(text.split(\n)), text: text}
def analyzemarkdown(self, filepath):
分析 Markdown 文件
with open(file_path, r, encoding=utf-8) as f:
text = f.read()
return {lines: len(text.split(\n)), text: text}
def analyzefile(self, filepath):
自动检测并分析任何支持的文件
ext = Path(file_path).suffix.lower()
if ext in [.xlsx, .xls]:
return self.analyzeexcel(filepath)
elif ext == .csv:
return self.analyzecsv(filepath)
elif ext == .docx:
return self.analyzeword(filepath)
elif ext == .pdf:
return self.analyzepdf(filepath)
elif ext == .txt:
return self.analyzetxt(filepath)
elif ext in [.md, .markdown]:
return self.analyzemarkdown(filepath)
else:
return {error: f不支持的格式:{ext}}
def generate_summary(self):
生成分析摘要
summary = {totalfiles: 0, filedetails: []}
for ftype, flist in self.files.items():
for fpath in flist:
try:
analysis = self.analyze_file(fpath)
summary[file_details].append({
name: os.path.basename(fpath),
type: ftype,
analysis: analysis
})
summary[total_files] += 1
except Exception as e:
summary[file_details].append({
name: os.path.basename(fpath),
type: ftype,
error: str(e)
})
return summary
用户:分析此文件夹中的所有 Excel 文件
代理:使用 DataAnalyzer 扫描并分析
用户:对比这些 CSV 文件
代理:读取并对比数据
用户:生成数据报告
代理:生成分析报告
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 data-analyzer-1775983461 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 data-analyzer-1775983461 技能
skillhub install data-analyzer-1775983461
文件大小: 3.06 KB | 发布时间: 2026-4-13 09:58