数理逻辑及其 python 实现:1
Notes on 「Mathematical logic through python」
一、命题逻辑的语法
本文里处理语法的 python 对象包括 Formula 和 Proof, 分别对应命题和证明,本章定义对 Formula 的各类函数,主要是对字符串进行解析,并且在解析树上做一些递归操作
命题逻辑中的 Formula
用 Formula 类来封装命题逻辑命题字符串,以下是实现对该命题属于哪类字符串的判断函数:
from functools import lru_cache
@lru_cache(maxsize=100) # Cache the return value of is_variable
def is_variable(string: str) -> bool:
return (
string[0] >= "p"
and string[0] <= "z"
and (len(string) == 1 or string[1:].isdecimal())
)
接着还有对常量、唯一的单目运算和三个双目运算判断:
def is_constant(string: str) -> bool:
return string == 'T' or string == 'F'
def is_unary(string: str) -> bool:
return string == '~'
def is_binary(string: str) -> bool:
return string == '&' or string == '|' or string == '->'
除了连接词外,很重要的是括号:
- \( \phi \& \psi \) 是非法的, \( (\phi \& \psi) \) 是合法的(也叫 valid, well-formed)
Formula 类的构造:
from __future__ import annotations
from typing import Mapping, Optional, Set, Tuple, Union
class Formula:
root: str
first: Optional[Formula]
second: Optional[Formula]
def __init__(self, root: str, first: Optional[Formula] = None,
second: Optional[Formula] = None):
if is_variable(root) or is_constant(root):
assert first is None and second is None
self.root = root
elif is_unary(root):
assert first is not None and second is None
self.root, self.first = root, first
else:
assert is_binary(root)
assert first is not None and second is not None
self.root, self.first, self.second = root, first, second
def __eq__(self, other: object) -> bool:
return isinstance(other, Formula) and str(self) == str(other)
def __ne__(self, other: object) -> bool:
return not self == other
以上对命题对象进行了分类:原子(常量或变量)、单目表达式、双目表达式
my_formula = Formula('~', Formula('&', Formula('p'), Formula('q76')))
print(my_formula.root, my_formula.first, my_formula.first.root, my_formula.first.first)
print(my_formula)
~ <__main__.Formula object at 0x7fda31ef7dc0> & <__main__.Formula object at 0x7fda31ef75b0> <__main__.Formula object at 0x7fda31ef7400>
当前是没有实现 Formula 的字符串表征,因此以上打印出对象地址,我们期望的是
str(Formula(string))==string
T1.1 命题字符串解析成语法树
实现表征(repr)函数,以便打印出,这是一个类似树中序遍历递归算法,打印出得到:
def _repr__(self) -> str:
# Task 1.1
if is_variable(self.root) or is_constant(self.root):
return self.root
elif is_unary(self.root):
return self.root + repr(self.first)
return f"({repr(self.first)}{self.root}{repr(self.second)})"
Formula.__repr__ = _repr__
print(my_formula.root, my_formula.first, my_formula.first.root, my_formula.first.first)
print(my_formula)
~ (p&q76) & p ~(p&q76)
T1.2 收集特殊的叶子节点:变量名
def variables(self) -> Set[str]:
# Task 1.2
res = set()
def dfs(formula):
if is_constant(formula.root):
return
if is_variable(formula.root):
res.add(formula.root)
return
if hasattr(formula, "first"):
dfs(formula.first)
if hasattr(formula, "second"):
dfs(formula.second)
dfs(self)
return res
Formula.variables = variables
可以获得所有变量名:
print(my_formula.variables())
{'p', 'q76'}
T1.3 收集非叶子节点和特殊叶子节点
搜集操作符以及常量
def operators(self) -> Set[str]:
# Task 1.3
res = set()
def dfs(formula):
if is_variable(formula.root):
return
else:
res.add(formula.root)
if hasattr(formula, "first"):
dfs(formula.first)
if hasattr(formula, "second"):
dfs(formula.second)
dfs(self)
return res
Formula.operators = operators
可以获得所有变量名和常量:
print(my_formula.operators())
{'&', '~'}
T1.4 _parse_prefix: 部分解析
某个字符串之所以能被确定性地转换成一个 Formula,是因为任何一个字符的功能是不能随着它在字符串里的位置而变化,每次看到某种字符串的时候,就知道了接下来的可能性,然后根据这些可能性继续递归地解析下去,这就是所谓 context free 的意思。 其中最重要是括号,当看到左括号,那么接下来目的就很明确:找到右括号,然后在左右括号之间的子字符串上递归解析,
以下是解析出有效的前缀和剩余后缀的函数,注意这里需要用 staticmethod 修饰,否则 Formula 实例去调用该函数的时候会报错。
@staticmethod
def parse_prefix(string: str) -> Tuple[Union[Formula, None], str]:
# Task 1.4
if string == "":
return None, "Empty Formula Error"
if string[0] != "(":
if is_constant(string[0]): # T or F
return Formula(string[0]), string[1:]
if is_unary(string[0]): # ~
f, remain = Formula._parse_prefix(string[1:])
if f is None:
return None, "Unary Syntax Error"
return Formula("~", f), remain
if not is_variable(string[0]):
return None, "Variable Sytax Error"
# read variable
n = len(string)
v_end = n
for i in range(1, n):
if not string[i].isdecimal():
return Formula(string[:i]), string[i:]
return Formula(string), ""
else:
# get first ()
left_b = 0
for i, ele in enumerate(string):
if ele == "(":
left_b += 1
elif ele == ")":
left_b -= 1
if left_b == 0:
break
f1, remain = Formula._parse_prefix(string[1:i])
if f1 is None:
return None, remain
if not remain or (remain[0] not in ["|", "&", "+"] and
remain[:2] not in ["->", "-&", "-|"] and
remain[:3] not in ["<->"]):
return None, "binary syntax Error"
if remain[0] in ["|", "&", "+"]:
root = remain[0]
f2, remain = Formula._parse_prefix(remain[1:])
if remain != "":
return None, f"Error: {string[:i+1]} is Not a valid formula"
return Formula(root, f1, f2), string[i+1:]
if remain[:1] == "-":
operator = remain[:2]
f2, remain = Formula._parse_prefix(remain[2:])
if remain != "":
return None, f"Error: {string[:i+1]} is Not a valid formula"
return Formula(operator, f1, f2), string[i+1:]
if remain[:1] == "<":
operator = remain[:3]
f2, remain = Formula._parse_prefix(remain[3:])
if remain != "":
return None, f"Error: {string[:i+1]} is Not a valid formula"
return Formula(operator, f1, f2), string[i+1:]
Formula._parse_prefix = parse_prefix
注意以上实现考虑了更多运算符参与的情况,比如 "<->", 这些不影响总体结构。
print(Formula._parse_prefix('p3&q'), Formula._parse_prefix('((p&q))') )
(p3, '&q') (None, 'binary syntax Error')
以上函数之所以能够实现,是因为当前设计的形式系统的公式有以下性质:
No formula is a prefix of another formula, except for the case of a variable name as a prefix of another variable name.
lemma 1.1 (Prefix-Free Property of Formulas)
以上代码中的以下代码片段是对变量名需要贪心解析的约定的实现,因为 q3 和 q321 都是合法的变量名,但 q3 是 q321 的前缀,为了遵循 prefix-Free 原则,不能解析到 q3 就停止,而应该继续向前查下去,直到遇到非数字字符为止。
for i in range(1, n):
if not string[i].isdecimal():
return Formula(string[:i]), string[i:]
T1.5-6 语法检查和完整的语法解析
有了上一节的函数,语法检查变得简单了:
@staticmethod
def is_formula(string: str) -> bool:
# Task 1.5
f, remain = Formula._parse_prefix(string)
if f is None:
return False
# if is_variable(f.root) or is_constant(f.root) or is_unary(f.root):
return remain == ""
Formula.is_formula = is_formula
print(Formula.is_formula("x13->x14"))
print(Formula.is_formula("(x13->x14)"))
False True
同样,完整的解析也变得简单:
@staticmethod
def parse(string: str) -> Formula:
assert Formula.is_formula(string)
# Task 1.6
f, remain = Formula._parse_prefix(string)
return f
Formula.parse = parse
print(Formula.parse("(x13->x14)"))
(x13->x14)
parse 函数的实现证明了以下定理:
There is a unique derivation tree for every valid propositional formula.
theorem 1.1 (Unique Readability of Formulas)
T1.7 Polish Notations: 中序到先序遍历
在 T1.1 中,我们采用的是 infix 格式的表达式,它在遍历语法树的时候遵循的是中序遍历,先打印左子树,然后打印当前操作符,再打印右子树。
只需要把 __repr__
中最后递归语句的顺序从中序遍历变成先序遍历就得到了前缀表达式,被称为 Polish notation.
f"({repr(self.first)}{self.root}{repr(self.second)})"
就得到前缀表达式:
def polish(self) -> str:
# Optional Task 1.7
def dfs(formula):
if is_constant(formula.root) or is_variable(formula.root):
return formula.root
if is_unary(formula.root):
return formula.root + dfs(formula.first)
assert formula.first and formula.second
return formula.root + dfs(formula.first) + dfs(formula.second)
return dfs(self)
Formula.polish = polish
print(my_formula.polish())
~&pq76
这种风格的好处是可以完全省略括号,坏处是不太自然(除非从小教这种写法的数学,不是学 1+1=2, 而是 =+112, 这也许是一种灾难)。
为什么不用后序遍历?当然也是可以的,不过由于大部分人的阅读习惯是从左往右,改成后序就需要从右向左阅读,更不习惯。
添加上括号就是 lisp 风格:
def lispy(self) -> str:
def dfs(formula):
if is_constant(formula.root) or is_variable(formula.root):
return formula.root
if is_unary(formula.root):
return f"({formula.root} {dfs(formula.first)})"
assert formula.first and formula.second
return f"({formula.root} {dfs(formula.first)} {dfs(formula.second)})"
return dfs(self)
Formula.lispy = lispy
print(my_formula.lispy())
(~ (& p q76))
T1.8: 前缀表达式的反序列化
T1.8: prefix 形式字符串的反序列化
@staticmethod
def parse_polish(string: str) -> Formula:
# Optional Task 1.8
f, remain = Formula._parse_polish_prefix(string)
return f
@staticmethod
def _parse_polish_prefix(string: str) -> Tuple[Union[Formula, None], str]:
if string == "":
return None, "Empty Formula Error"
if is_constant(string[0]): # T or F
return Formula(string[0]), string[1:]
if is_unary(string[0]): # ~
f, remain = Formula._parse_polish_prefix(string[1:])
if f is None:
return None, "Unary Syntax Error"
return Formula("~", f), remain
if is_variable(string[0]):
# read variable
n = len(string)
v_end = n
for i in range(1, n):
if not string[i].isdecimal():
return Formula(string[:i]), string[i:]
return Formula(string), ""
if string[0] in ["|", "&"]:
f1, remain = Formula._parse_polish_prefix(string[1:])
if f1 is None:
return None, remain
root = string[0]
f2, remain = Formula._parse_polish_prefix(remain)
# different from parser_prefix
# if remain != "":
# return None, f"Error: {string} is Not a valid formula"
return Formula(root, f1, f2), remain
if string[:2] == "->":
f1, remain = Formula._parse_polish_prefix(string[2:])
if f1 is None:
return None, remain
root = "->"
f2, remain = Formula._parse_polish_prefix(remain)
# if remain != "":
# return None, f"Error: {string} is Not a valid formula"
return Formula(root, f1, f2), remain
return None, "Error"
Formula._parse_polish_prefix = _parse_polish_prefix
Formula.parse_polish = parse_polish
print(Formula.parse_polish('~&pq76'))
print(Formula.parse_polish('~&pq76').polish())
~(p&q76) ~&pq76