diff options
| author | Andrew <saintruler@gmail.com> | 2019-03-04 20:03:49 +0400 |
|---|---|---|
| committer | Andrew <saintruler@gmail.com> | 2019-03-04 20:03:49 +0400 |
| commit | ddd5bfe49f1b9c3dfb3b1b448bece9fc636cb88f (patch) | |
| tree | f048b71cc9839c2b275ffee652ad91d24edb2ca7 /day1/task4/task4.py | |
| parent | cf734a68aeba34576d9bf5430aeaa75224010c67 (diff) | |
Day 1 Task 4
Diffstat (limited to 'day1/task4/task4.py')
| -rw-r--r-- | day1/task4/task4.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/day1/task4/task4.py b/day1/task4/task4.py new file mode 100644 index 0000000..9ed9f2d --- /dev/null +++ b/day1/task4/task4.py @@ -0,0 +1,25 @@ +import string +import re +import os + +with open('task4_data.html') as f: + data = f.read() + +# Не знаю зачем, но в задании так написано +os.chdir('..') +os.mkdir('htmls') +os.chdir('htmls') + +cleared_data = ''.join(filter(lambda char: char in string.printable, data)) +with open('ascii_cleared.html', 'w', encoding='utf-8') as f: + f.write(cleared_data) + +body_cleared = re.sub( + r'<BODY>(.|\n)*?</BODY>', + lambda match: match.string[match.start(): match.end()].replace('\n', ''), + data +) + +with open('body_cleared.html', 'w', encoding='utf-8') as f: + f.write(body_cleared) + |