blob: 9ed9f2d084069dd425108e39e0531ce59bf2a923 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
import string
import re
import os
with open('task4_data.html') as f:
data = f.read()
# Не знаю зачем, но в задании так написано
os.chdir('..')
os.mkdir('htmls')
os.chdir('htmls')
cleared_data = ''.join(filter(lambda char: char in string.printable, data))
with open('ascii_cleared.html', 'w', encoding='utf-8') as f:
f.write(cleared_data)
body_cleared = re.sub(
r'<BODY>(.|\n)*?</BODY>',
lambda match: match.string[match.start(): match.end()].replace('\n', ''),
data
)
with open('body_cleared.html', 'w', encoding='utf-8') as f:
f.write(body_cleared)
|