成品网站nike源码1688临海商用高端网站设计新感觉建站
2026/2/6 6:08:14 网站建设 项目流程
成品网站nike源码1688,临海商用高端网站设计新感觉建站,阿里云的网站模版好吗,徐州网站简介电子书转盲文转换器一、实际应用场景与痛点应用场景视障学生小李需要阅读教材和课外书籍。虽然市面上有少量盲文书籍#xff0c;但种类有限、价格昂贵、更新缓慢。当前的数字阅读器如读屏软件虽然能朗读文本#xff0c;但无法替代盲文的触觉阅读体验。盲文具有独特的优势但种类有限、价格昂贵、更新缓慢。当前的数字阅读器如读屏软件虽然能朗读文本但无法替代盲文的触觉阅读体验。盲文具有独特的优势帮助视障人士学习正确的拼写、理解文本结构、进行深度学习。小李急需一款能将普通电子书转换为盲文格式的工具以便通过盲文点字显示器阅读或打印成盲文书籍。核心痛点1. 盲文资源匮乏市面盲文书籍种类少更新慢2. 转换工具缺失缺乏高效的中文盲文转换工具3. 格式兼容性差不同电子书格式转换困难4. 盲文印刷昂贵专用盲文打印机价格高昂5. 转换准确性低自动化转换容易出错6. 排版复杂盲文特殊的排版规则难以处理7. 多语言支持中英文混合文本处理困难二、核心逻辑设计1. 输入电子书文件txt, epub, pdf等2. 提取和清理文本内容3. 中文文本转拼音汉语盲文基于拼音4. 拼音转盲文符号按盲文规则5. 应用盲文缩写和缩略规则6. 自动分段和分页处理7. 生成盲文格式文档8. 支持多种输出格式文本、PDF、BRF9. 提供打印模板和预览三、模块化代码实现主程序文件ebook_to_braille_converter.py#!/usr/bin/env python3# -*- coding: utf-8 -*-电子书转盲文转换器将电子书转换为盲文格式生成可打印的盲文文档版本3.0.0作者无障碍智能助手import osimport sysimport reimport jsonimport zipfileimport xml.etree.ElementTree as ETfrom typing import Dict, List, Tuple, Optional, Any, Setfrom dataclasses import dataclass, asdict, fieldfrom enum import Enumimport warningswarnings.filterwarnings(ignore)# 文本处理try:import jiebaimport pypinyinfrom pypinyin import pinyin, StyleJIEBA_AVAILABLE TruePYPINYIN_AVAILABLE Trueexcept ImportError:JIEBA_AVAILABLE FalsePYPINYIN_AVAILABLE Falseprint(警告: 中文处理库未安装)# PDF处理try:import PyPDF2PDF_AVAILABLE Trueexcept ImportError:PDF_AVAILABLE Falseprint(警告: PDF处理库未安装)# 文档生成try:from reportlab.lib.pagesizes import A4, letterfrom reportlab.lib.units import mm, inchfrom reportlab.pdfgen import canvasfrom reportlab.pdfbase import pdfmetricsfrom reportlab.pdfbase.ttfonts import TTFontREPORTLAB_AVAILABLE Trueexcept ImportError:REPORTLAB_AVAILABLE Falseprint(警告: PDF生成库未安装)class BrailleSystem(Enum):盲文体系枚举CHINESE_MANDARIN chinese_mandarin # 中文普通话盲文ENGLISH_UEB english_ueb # 英文统一盲文NUMERIC numeric # 数字盲文MUSIC music # 音乐盲文MATH math # 数学盲文class BrailleCell:盲文单元格6点或8点def __init__(self, dots: List[int] None, is_6dot: bool True):初始化盲文单元格Args:dots: 点的列表如[1,3,5]表示第1、3、5点凸起is_6dot: 是否为6点盲文True为6点False为8点self.is_6dot is_6dotself.max_dots 6 if is_6dot else 8if dots is None:self.dots []else:# 验证并排序点self.dots sorted([d for d in dots if 1 d self.max_dots])def __str__(self) - str:字符串表示if not self.dots:return ⠀ # 空盲文单元格# 转换为Unicode盲文字符return self.to_unicode()def to_unicode(self) - str:转换为Unicode字符if not self.dots:# 空单元格return ⠀ if self.is_6dot else ⣀# 计算Unicode码点# 盲文Unicode范围U2800 - U28FFbase 0x2800if self.is_6dot:# 6点盲文dot_map {1: 0x01, 2: 0x02, 3: 0x04, 4: 0x08, 5: 0x10, 6: 0x20}else:# 8点盲文dot_map {1: 0x01, 2: 0x02, 3: 0x04, 4: 0x08,5: 0x10, 6: 0x20, 7: 0x40, 8: 0x80}code basefor dot in self.dots:code dot_map.get(dot, 0)return chr(code)def to_ascii(self) - str:转换为ASCII表示如134表示1,3,4点凸起if not self.dots:return 0return .join(str(d) for d in self.dots)def to_binary(self) - str:转换为二进制表示binary [0] * self.max_dotsfor dot in self.dots:if 1 dot self.max_dots:binary[dot-1] 1return .join(binary)classmethoddef from_binary(cls, binary_str: str, is_6dot: bool True) - BrailleCell:从二进制字符串创建max_dots 6 if is_6dot else 8if len(binary_str) ! max_dots:raise ValueError(f二进制字符串长度必须为{max_dots})dots []for i, bit in enumerate(binary_str):if bit 1:dots.append(i 1)return cls(dots, is_6dot)classmethoddef from_unicode(cls, char: str) - BrailleCell:从Unicode字符创建code ord(char)if 0x2800 code 0x28FF:# 盲文Unicode字符dot_value code - 0x2800# 判断是6点还是8点is_6dot dot_value 0x3F # 6点盲文范围0x2800-0x283Fif is_6dot:dots []dot_map {0x01: 1, 0x02: 2, 0x04: 3, 0x08: 4, 0x10: 5, 0x20: 6}else:dots []dot_map {0x01: 1, 0x02: 2, 0x04: 3, 0x08: 4,0x10: 5, 0x20: 6, 0x40: 7, 0x80: 8}for value, dot_num in dot_map.items():if dot_value value:dots.append(dot_num)return cls(dots, is_6dot)raise ValueError(f不是有效的盲文Unicode字符: {char})def __eq__(self, other) - bool:相等比较if not isinstance(other, BrailleCell):return Falsereturn self.dots other.dots and self.is_6dot other.is_6dotdataclassclass BrailleCharacter:盲文字符可能由多个单元格组成cells: List[BrailleCell]original_char: strdescription: str def __str__(self) - str:字符串表示return .join(str(cell) for cell in self.cells)def to_ascii(self) - str:转换为ASCII表示return -.join(cell.to_ascii() for cell in self.cells)def is_contracted(self) - bool:是否为缩写形式return len(self.cells) 1class BrailleTable:盲文对照表基类def __init__(self, system: BrailleSystem):self.system systemself.table {}self.contractions {} # 缩写表self.init_table()def init_table(self):初始化对照表子类实现passdef get_braille(self, char: str) - Optional[BrailleCharacter]:获取字符的盲文表示return self.table.get(char)def get_contraction(self, word: str) - Optional[BrailleCharacter]:获取单词的缩写形式return self.contractions.get(word.lower())class ChineseBrailleTable(BrailleTable):中文盲文对照表def __init__(self):super().__init__(BrailleSystem.CHINESE_MANDARIN)# 加载配置文件self.load_config()def load_config(self):加载配置文件config_path config/chinese_braille.jsondefault_config self.get_default_config()try:if os.path.exists(config_path):with open(config_path, r, encodingutf-8) as f:config json.load(f)else:config default_config# 保存默认配置os.makedirs(os.path.dirname(config_path), exist_okTrue)with open(config_path, w, encodingutf-8) as f:json.dump(config, f, indent2, ensure_asciiFalse)self.load_from_config(config)except Exception as e:print(f加载配置文件失败使用默认配置: {e})self.load_from_config(default_config)def get_default_config(self) - Dict:获取默认配置return {system: chinese_mandarin,description: 中文普通话盲文现行盲文,tone_marks: True,use_contractions: True,characters: {# 声母b: [[1]],p: [[1,2,3,4]],m: [[1,3,4]],f: [[1,2,4]],d: [[1,4,5]],t: [[2,3,4,5]],n: [[1,3,4,5]],l: [[1,2,3]],g: [[1,2,4,5]],k: [[1,3]],h: [[1,2,5]],j: [[2,4,5]],q: [[1,2,3,4,5]],x: [[1,3,4,5]],zh: [[1,3,5,6]],ch: [[1,6]],sh: [[1,4,6]],r: [[2,4,5,6]],z: [[1,3,5]],c: [[1,3,4,6]],s: [[2,3,4]],y: [[1,3,4,5,6]],w: [[2,4,5,6]],# 韵母a: [[3,5]],o: [[1,3,5]],e: [[2,6]],i: [[2,4]],u: [[1,3,6]],v: [[1,2,4,5,6]],ai: [[1,6]],ei: [[2,3,4,6]],ao: [[3,5,6]],ou: [[2,3,5,6]],an: [[3,6]],en: [[2,6], [3,6]],ang: [[1,3,5,6]],eng: [[2,6], [2,3,4,6]],er: [[2,3,4,5,6]],i_: [[2,4]], # 单独iia: [[2,4], [3,5]],iao: [[2,4], [3,5,6]],ie: [[2,4], [2,6]],iu: [[2,4], [1,3,6]],ian: [[2,4], [3,6]],in: [[2,4], [2,3,4,6]],iang: [[2,4], [1,3,5,6]],ing: [[2,4], [2,6], [2,3,4,6]],u_: [[1,3,6]], # 单独uua: [[1,3,6], [3,5]],uo: [[1,3,6], [1,3,5]],uai: [[1,3,6], [1,6]],ui: [[1,3,6], [2,4]],uan: [[1,3,6], [3,6]],un: [[1,3,6], [2,3,4,6]],uang: [[1,3,6], [1,3,5,6]],ong: [[1,3,6], [2,3,5,6]],v_: [[1,2,4,5,6]], # 单独üve: [[1,2,4,5,6], [2,6]],van: [[1,2,4,5,6], [3,6]],vn: [[1,2,4,5,6], [2,3,4,6]],# 声调1: [[3,4,5]], # 阴平2: [[3,4]], # 阳平3: [[3,5,6]], # 上声4: [[3,4,5,6]], # 去声5: [[3,4,6]], # 轻声# 数字0: [[3,5,6]],1: [[1]],2: [[1,2]],3: [[1,4]],4: [[1,4,5]],5: [[1,5]],6: [[1,2,4]],7: [[1,2,4,5]],8: [[1,2,5]],9: [[2,4]],# 标点符号: [[2]], # 逗号。: [[2,5,6]], # 句号: [[2,3,5]], # 感叹号: [[2,3,6]], # 问号: [[2,3]], # 分号: [[2,5]], # 冒号、: [[2,6]], # 顿号「: [[2,3,5,6]], # 左引号」: [[3,5,6]], # 右引号: [[2,3,5,6]], # 左括号: [[3,5,6]], # 右括号《: [[2,3,5,6]], # 左书名号》: [[3,5,6]], # 右书名号—: [[3,6]], # 破折号…: [[2,3,6]], # 省略号·: [[3]], # 间隔号# 特殊符号 : [[0]], # 空格\n: [], # 换行\t: [[0,0,0,0]], # 制表符},contractions: {的: [[1,4,5,6]], # 的了: [[1,2,3,5,6]], # 了是: [[2,3,4,6]], # 是不: [[1,2]], # 不在: [[1,2,6]], # 在有: [[1,2,4,6]], # 有和: [[1,2,3,4,6]], # 和这: [[1,4,5,6], [3,5]], # 这个: [[1,2,3,4,5]], # 个我: [[2,4,6]], # 我们: [[1,2,3,4,5,6]], # 们}}def load_from_config(self, config: Dict):从配置加载characters config.get(characters, {})contractions config.get(contractions, {})# 加载字符表for char, dots_list in characters.items():cells []for dots in dots_list:if dots 0 or dots [0]:cells.append(BrailleCell([]))else:cells.append(BrailleCell(dots))self.table[char] BrailleCharacter(cells, char)# 加载缩写表for word, dots_list in contractions.items():cells []for dots in dots_list:cells.append(BrailleCell(dots))self.contractions[word] BrailleCharacter(cells, word, f缩写: {word})def get_braille_for_pinyin(self, pinyin_str: str, tone: int 0) - BrailleCharacter:获取拼音的盲文表示Args:pinyin_str: 拼音字符串tone: 声调1-50表示无调Returns:盲文字符# 标准化拼音pinyin_str pinyin_str.lower()# 特殊处理üpinyin_str pinyin_str.replace(ü, v)# 分割声母和韵母initials [b, p, m, f, d, t, n, l, g, k, h,j, q, x, zh, ch, sh, r, z, c, s, y, w]# 查找最长匹配的声母initial remainder pinyin_strfor init in sorted(initials, keylen, reverseTrue):if pinyin_str.startswith(init):initial initremainder pinyin_str[len(init):]break# 获取声母盲文cells []if initial and initial in self.table:cells.append(self.table[initial].cells[0])elif initial: # 如果没有单独的声母可能需要特殊处理# 尝试分解pass# 获取韵母盲文if remainder in self.table:cells.append(self.table[remainder].cells[0])elif remainder: # 尝试匹配部分for i in range(len(remainder), 0, -1):part remainder[:i]if part in self.table:cells.append(self.table[part].cells[0])remainder remainder[i:]break# 添加声调if tone 0 and str(tone) in self.table:cells.append(self.table[str(tone)].cells[0])if not cells:# 如果没有找到匹配返回空单元格cells.append(BrailleCell())return BrailleCharacter(cells, pinyin_str, f拼音: {pinyin_str} 声调: {tone})class EnglishBrailleTable(BrailleTable):英文盲文对照表UEBdef __init__(self):super().__init__(BrailleSystem.ENGLISH_UEB)self.init_table()def init_table(self):初始化英文盲文表# 基本字母letters {a: [1],b: [1,2],c: [1,4],d: [1,4,5],e: [1,5],f: [1,2,4],g: [1,2,4,5],h: [1,2,5],i: [2,4],j: [2,4,5],k: [1,3],l: [1,2,3],m: [1,3,4],n: [1,3,4,5],o: [1,3,5],p: [1,2,3,4],q: [1,2,3,4,5],r: [1,2,3,5],s: [2,3,4],t: [2,3,4,5],u: [1,3,6],v: [1,2,3,6],w: [2,4,5,6],x: [1,3,4,6],y: [1,3,4,5,6],z: [1,3,5,6],}for char, dots in letters.items():upper_char char.upper()self.table[char] BrailleCharacter([BrailleCell(dots)], char)self.table[upper_char] BrailleCharacter([BrailleCell([6]), BrailleCell(dots)], upper_char)# 数字前缀self.table[#] BrailleCharacter([BrailleCell([3,4,5,6])], #)# 标点符号punctuation {.: [2,5,6],,: [2],?: [2,3,6],!: [2,3,5],;: [2,3],:: [2,5],: [2,3,5,6],: [3],(: [2,3,5,6],): [3,5,6],[: [2,3,5,6],]: [3,5,6],{: [2,3,5,6],}: [3,5,6],-: [3,6],_: [3,6,3,6], # 下划线}for char, dots in punctuation.items():self.table[char] BrailleCharacter([BrailleCell(dots)], char)# 常用缩写contractions {the: [2,3,4,6],and: [1,2,3,4,6],for: [1,2,3,4,5,6],with: [2,3,4,5,6],ing: [3,4,6],ed: [1,2,4,5,6],sh: [1,4,6],th: [1,4,5,6],wh: [1,5,6],ou: [1,2,5,6],st: [3,4],ar: [3,4,5],er: [1,2,4,5,6],gh: [1,2,6],ow: [2,4,6],}for word, dots in contractions.items():self.contractions[word] BrailleCharacter([BrailleCell(dots)], word, f缩写: {word})class TextExtractor:文本提取器支持多种格式def __init__(self, config: Dict):初始化文本提取器Args:config: 提取器配置self.config configdef extract_text(self, filepath: str) - Tuple[str, Dict]:提取文本内容Args:filepath: 文件路径Returns:(文本内容, 元数据)if not os.path.exists(filepath):raise FileNotFoundError(f文件不存在: {filepath})ext os.path.splitext(filepath)[1].lower()if ext .txt:return self.extract_txt(filepath)elif ext .pdf:return self.extract_pdf(filepath)elif ext .epub:return self.extract_epub(filepath)elif ext in [.doc, .docx]:return self.extract_doc(filepath)elif ext in [.html, .htm]:return self.extract_html(filepath)else:raise ValueError(f不支持的格式: {ext})def extract_txt(self, filepath: str) - Tuple[str, Dict]:提取纯文本try:with open(filepath, r, encodingutf-8) as f:content f.read()metadata {format: txt,encoding: utf-8,size: len(content),chars: len(content),lines: content.count(\n) 1}return content, metadataexcept UnicodeDecodeError:# 尝试其他编码encodings [gbk, gb2312, big5, latin-1]for encoding in encodings:try:with open(filepath, r, encodingencoding) as f:content f.read()metadata {format: txt,encoding: encoding,size: len(content),chars: len(content),lines: content.count(\n) 1}return content, metadataexcept:continueraise ValueError(无法解码文本文件)def extract_pdf(self, filepath: str) - Tuple[str, Dict]:提取PDF文本if not PDF_AVAILABLE:raise ImportError(PyPDF2未安装无法处理PDF文件)try:text metadata {}with open(filepath, rb) as f:pdf_reader PyPDF2.PdfReader(f)# 提取元数据if pdf_reader.metadata:metadata {title: pdf_reader.metadata.get(/Title, ),author: pdf_reader.metadata.get(/如果你觉得这个工具好用欢迎关注我

需要专业的网站建设服务?

联系我们获取免费的网站建设咨询和方案报价,让我们帮助您实现业务目标

立即咨询