import re
def rm_tags(text):
#移除HTML TAG
re_tag = re.compile(r'<[^>]+>')
#移除non-ASCII字元.
text = re.sub(re_tag,'',text)
text = re.sub('[^\x00-\x97]+',' ', text)
# 移除 URLs
text = re.sub('https?:\/\/.*[\r\n]*', ' ', text)
# 移除特殊字元.
text = re.sub('[?!+%{}:;.,"\'()\[\]_]', '',text)
# 移除2個以上空白.
text = re.sub('\s+',' ',text)
return text