summaryrefslogtreecommitdiff
path: root/day1/task4/task4.py
blob: 9ed9f2d084069dd425108e39e0531ce59bf2a923 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import string
import re
import os

with open('task4_data.html') as f:
    data = f.read()

# Не знаю зачем, но в задании так написано
os.chdir('..')
os.mkdir('htmls')
os.chdir('htmls')

cleared_data = ''.join(filter(lambda char: char in string.printable, data))
with open('ascii_cleared.html', 'w', encoding='utf-8') as f:
    f.write(cleared_data)

body_cleared = re.sub(
    r'<BODY>(.|\n)*?</BODY>',
    lambda match: match.string[match.start(): match.end()].replace('\n', ''),
    data
)

with open('body_cleared.html', 'w', encoding='utf-8') as f:
    f.write(body_cleared)