diff --git "a/input.txt" "b/input.txt" new file mode 100644--- /dev/null +++ "b/input.txt" @@ -0,0 +1,22613 @@ + + +import os +r = [] +lr = [] +l = 0 +for root, dirs, files in os.walk('.'): + for x in root: + if x == '\\': + l += 1 + r.append(root) + lr.append(l) + l = 0 +print('Самым глубоким каталогом является: ') +print(r[lr.index(max(lr))], ' --- ', max(lr), 'уровня') + + +import os +import re +l = 0 +folder = [] +p = re.compile(r"[0-9]+", re.U) +for root, dirs, files in os.walk('.'): + for d in dirs: + if p.search(d): + folder.append(d) + l += 1 +print('Всего найдено {} папок с цифрами в названии.'.format(l)) +print('\nВот они:') +for x in folder: + print(x) +import re +file = input("Какой файл открыть (введите путь к файлу)? ") +with open(file, 'r', encoding = 'utf-8') as f: + i = (len(re.findall(r'\b.+ing\b', f.read()))) + print(i) + + +import re +file = input("Какой файл открыть (введите путь к файлу)? ") +def open_file(text): + with open(file, 'r', encoding = 'utf-8') as f: + result = len(re.findall(r'\b.+ing\b', f.read())) + print(result) + + +import os +import re +l = 0 +folder = [] +p = re.compile(r"[0-9]+", re.U) +for root, dirs, files in os.walk('.'): + for d in dirs: + if p.search(d): + folder.append(d) + l += 1 +print('Всего найдено {} папок с цифрами в названии.'.format(l)) +print('\nВот они:') +for x in folder: + print(x) + + + +import re +file = open('insects.txt', 'r', encoding = 'utf-8') +f = file.read() +delete = re.sub('<.*?>', '', f, flags = re.DOTALL) +change = 'комар((а(ми|х)?)|и|ы|о(в|м)|у|е)?([\s,.!\?:"\(\)\'»])' +Change = 'Комар((а(ми|х)?)|и|ы|о(в|м)|у|е)?([\s,.!\?:"\(\)\'»])' +m = re.sub(change, 'слон\\1\\2', delete) +m = re.sub(Change, 'Слон\\1\\2', m) +with open('elephants.txt', 'w', encoding = 'utf-8') as f: + f.write(m) +a = [] +word = input('enter a word:') +while word: + a.append(word) + word = input('enter a word:') +for w in a: + if len(str(w)) > 5: + print(w) +s = input('enter a word:') +for i in range(len(s)): + print(s[:i+1]) + +file = open('text.txt','r',encoding='utf-8') +length1 = 0 +length3 = 0 +for word in file: + if len(word) == 1: + length1 += 1 + elif len(word) == 3: + lenght3 += 1 +if length1 == 0: + print('No words with length of 1 symbol') +elif length3 == 0: + print('No words with length of 3 symbols') +else: + print('In file '+str(length3/length1)+' times more words of length 3 than of words of length 1') +file.close() + +print('Введите три числа:') +a = int(input()) +b = int(input()) +c = int(input()) +s = 0 +if a + b == c: + print('Число',c,'равно сумме первых двух чисел.') +else: + print('Число',c,'не равно сумме первых двух чисел.') +if a * c + b == 0: + x = c + print('Число',c,'является решением линейного уравнения a * x + b = 0, где a - первое число, b - второе число.') +else: + print('Число',c,'не является решением линейного уравнения a * x + b = 0, где a - первое число, b - второе число.') +import re +with open('text.txt', 'r', encoding='utf-8') as f: + exclude = '' + f.readline() + i = 0 + for line in f: + if exclude in line: + break + else: + i+=1 + with open('numbers_of_lines.txt', 'w', encoding = 'utf-8') as m: + m.write(str(i)) + + + +import re +with open ('text.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + result = re.finditer('откр(ы((т(ый|ь))|л(а|о|и)?|в(ший?)?)|о(й(те|ся)?|ют?|е(шь|те?|м)))', text) + for match in result: + print(match.group()) + +import random +def noun(syllables): + file = open('nouns_' + syllables + '.txt','r', encoding = 'utf-8') + text = file.read() + verbs = text.split('\n') + return random.choice(verbs) +def verb(syllables): + file = open('verbs_' + syllables + '.txt','r', encoding = 'utf-8-') + text = file.read() + verbs = text.split('\n') + return random.choice(verbs) +def punctuation(): + marks = ['.', ',', '?', '!', '', '-', '...', '?!'] + return random.choice(marks) +def stroka_5(): + return noun('2') + ' ' + verb('3') + punctuation() +def stroka_7(): + return noun('4') + ' ' + verb('3') + punctuation() +def create_poem(): + return stroka_5() + '\n' + stroka_7() + '\n' + stroka_5() +print(create_poem()) + + +import csv +def lets_play(): + words = {} + with open('words.csv', 'r', encoding='utf-8') as f: + text = csv.reader(f, delimiter=',') + for row in text: + words[row[0]] = row[1] + n = 0 + keys = list(words.keys()) + while n < len(words): + i = 0 + while i <= len(words): + if i < len(words): + resp = input(keys[n] + ' ') + if resp == words[keys[n]]: + print('You rock!') + n+=1 + break + else: + print('No. You have ' +str(len(words[keys[n]]))+' more guesses.') + i+=1 + else: + print('Sorry, but you have run out of guesses. The right answer is '+keys[n]+' '+words[keys[n]]) + n+=1 +c = lets_play() +import xml.etree.ElementTree as ET +import glob, os + +f = open('cout_of_sentences', 'w') +for file in glob.glob("*.xhtml"): + print(file) + tree = ET.parse(file) + root = tree.getroot() + i=0 + for word in root.iter('se'): + + i=i+1 + f.write(file+"\t"+str(i)+"\n") + print (i) +f.close() +import xml.etree.ElementTree as ET +import csv +import glob, os +csvfile = open('info.csv', 'w') +with csvfile: + fieldnames = ['Название файла', 'Автор', 'Тематика текста'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames,delimiter=';') + writer.writeheader() + for file in glob.glob("*.xhtml"): + + tree = ET.parse(file) + root = tree.getroot() + author='' + topic='' + for meta in root.iter('meta'): + if (meta.attrib['name']=='author'): + author=meta.attrib['content'] + if (meta.attrib['name']=='topic'): + topic=meta.attrib['content'] + + writer.writerow({'Название файла': file, 'Автор': author,'Тематика текста':topic}) + + +f = open('one.txt', 'r', encoding = 'utf-8') +maks = 1; +mini = 999999; +for line in f: + words = line.split() + if len(words) != 0: + s = len(words) - 1 + for each in words: + s += len(each) + if s > maks: + maks = s + if s < mini: + mini = s +print(maks/mini) +f.close() +w = input('Спрашивает у пользователя слово (в кириллице) ') +t = len(w) - 1 +print('Выводит на экран нечётные буквы этого слова (но только если это буква "о", буква "п" или буква "е"\n') +while t>=0: + if (t%2==0) and ((w[t]=="о") or (w[t]=="п") or (w[t]=="е")): + print('"', w[t], '" ') + t-=1 + +import random + + +vowels = "АЕЁИОУЫЭЮЯаеёиоуыэюя" +stressed_vowels = "АЕЁИОУЫЭЮЯ" +non_stressed_vowels ="аеёиоуыэюя" +strng = '' +def word_read(): + f = open("stressed_out.txt", 'r', encoding = "utf-8") + f2 = f.read() + f.close() + words = f2.split('\n') + return words +def word_info_vowels(words): + vowel_info = [] + for indx, word in enumerate(words): + vowel_info.append(0) + for letter in word: + if letter in vowels: + vowel_info[indx] += 1 + return vowel_info +def word_info_stresses(words): + stress_info = [] + for indx, word in enumerate(words): + stress_info.append(0) + vow = 0 + for letter in word: + if letter in vowels: + vow += 1 + if letter in stressed_vowels: + stress_info[indx] = vow + return stress_info +def strng_tv(): + t_v = 0 + for letter in strng: + if letter in stressed_vowels: + t_v = 0 + if letter in non_stressed_vowels: + t_v += 1 + return t_v +def strng_tsv(): + t_s_v = 0 + for letter in strng: + if letter in stressed_vowels: + t_s_v += 1 + return t_s_v +def new_word_in_line(words, vowel_info, stress_info): + global strng + t_v = strng_tv() + t_s_v = strng_tsv() + if (t_v == 0) and (t_s_v == 0): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] < 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 0) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 3) and (vowel_info[n] < 6): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 0) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 3) and (vowel_info[n] == 5): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 1) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 2) and (vowel_info[n] < 5): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 1) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 2) and (vowel_info[n] == 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 2) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] < 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 2) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] == 3): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " +def create_line(words, vowel_info, stress_info): + global strng + strng = "" + for n in range(4): + new_word_in_line(words, vowel_info, stress_info) + new_word_in_line(words, vowel_info, stress_info) +def create_txt(words, vowel_info, stress_info): + global strng + for n in range(4): + create_line(words, vowel_info, stress_info) + print(strng.lower().capitalize()) +def main(): + words = word_read() + vowel_info = word_info_vowels(words) + stress_info = word_info_stresses(words) + create_txt(words, vowel_info, stress_info) +main() + + + + + + + +symbols = '\'"/.,<>:;[]{}\\|1234567890`~!@ +capital_letters = 'QWERTYUIOPASDFGHJKLZXCVBNMЙЦУКЕНГШЩЗХЪЁФЫВАПРОЛДЖЭЯЧСМИТЬБЮ' +template = 'Предложение № {0}, слово с заглавной буквы № {1} - это "{2}".\n' +def open_file(): + f = open("oister.txt", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st +def clean(st): + arr_dirty = st.replace('.,!?…', '.').split('.') + arr_cleaner = [sentence.replace(symbols, '') for sentence in arr_dirty] + arr_less_10 = [sentence for sentence in arr_cleaner if len(sentence.split()) >= 10] + arr = [sentence.split() for sentence in arr_cleaner] + return arr +def find_capitals(arr): + for indx, sentence in enumerate(arr): + i = 0 + for word in sentence: + if word[0] in capital_letters: + i += 1 + print(template.format(indx + 1, i, word)) +def main(): + st = open_file() + arr = clean(st) + find_capitals(arr) +main() + +import random +def open_file(): + f = open("words_with_explanations.csv", 'r', encoding = "utf-8") + arr = f.readlines() + f.close() + return arr +def mapish(arr): + mp = {} + for line in arr: + words = line.split(";") + mp[words[0]] = words[1].replace('\n', '') + return mp +def game(mp): + answ = input('Хотите сыграть - введите "да" ') + while answ == 'да': + word = random.choice(list(mp.keys())) + st = "" + for letter in mp[word]: + st += "*" + print(word, " ", st) + t = False + while t == False: + guess = input("Ваша версия ") + if guess == mp[word]: + t = True + print("Да!!! Ура!!!") + else: + print("Нет(") + answ = input('Хотите сыграть - введите "да" ') +def main(): + arr = open_file() + mp = mapish(arr) + game(mp) +main() + + +import os +import re +def open_file(path): + fi = open(path, 'r', encoding = "cp1251") + st = fi.read() + fi.close() + return st +def find_auth(raw): + auth_arr = re.search('([а-яёА-ЯЁa-zA-Z- ]*)\..*', raw).group(1) + auth = '' + for l in auth_arr: + auth += l + return auth +def find_date(raw): + date_arr = re.search('.*, ([0-9.]*)', raw).group(1) + date = '' + for l in date_arr: + date += l + return date +def all_in_all(): + auth = [] + date = [] + file_all = [] + path = 'news' + for root, dirs, files in os.walk(path): + for f in files: + file_all.append(f) + raw = open_file(os.path.join(root, f)) + auth.append(find_auth(raw)) + date.append(find_date(raw)) + return auth, date, file_all +def wrt(stri): + f = open("result.csv", 'w', encoding = "utf-8") + f.write(stri) + f.close() +def main(): + auth, date, file_all = all_in_all() + stri = '' + for idx, file in enumerate(file_all): + stri += file + stri += ',' + stri += auth[idx] + stri += ',' + stri += date[idx] + stri += '\n' + wrt(stri) +main() + + + + +punct = '\'"/.,<>:;[]{}\\|1234567890`~!@ +import os +import re +def open_file(path): + fi = open(path, 'r', encoding = "cp1251") + st = fi.read() + fi.close() + return st +def norm_txt(st): + st_clean = st.replace('', '') + st_clean = st_clean.replace('', '\n') + st_clean = st_clean.replace('', '') + st_clean = st_clean.replace('\n', '') + st_clean = st_clean.replace('', '') + st_clean = st_clean.replace('', ' ') + st_clean = re.sub('', '', st_clean) + st_clean = re.sub('<.*>\n', '', st_clean) + st_clean = st_clean.replace(' ', ' ') + return st_clean +def count_words(f): + num = 0 + st = open_file(f) + st_clean = norm_txt(st) + arr_word = [] + for word in st_clean.split(): + if word.strip(punct) != '': + arr_word.append(word.strip(punct)) + num = len(arr_word) + return num +def all_in_all(): + arr = [] + file_all = [] + path = 'news' + for root, dirs, files in os.walk(path): + for f in files: + arr.append(count_words(os.path.join(root, f))) + file_all.append(f) + return arr, file_all +def wrt(stri): + f = open("result.txt", 'w', encoding = "utf-8") + f.write(stri) + f.close() +def main(): + arr, file_all = all_in_all() + stri = '' + for idx, file in enumerate(file_all): + stri += file + stri += '\t' + stri += str(arr[idx]) + stri += '\n' + wrt(stri) +main() + + + +import os +cyrillic_symbols = '\'"/.,<>:;[]{}\\|1234567890` ~!@ +def go_around(): + q = 0 + for root, dirs, files in os.walk('.'): + for d in dirs: + T = 0 + for letter in d: + if letter not in cyrillic_symbols: + T += 1 + if T == 0: + q += 1 + print(q) +def main(): + go_around() +main() + + +f = open('one.txt', 'r', encoding = 'utf-8') +maks = 1; +mini = 999999; +for line in f: + words = line.split() + if len(words) != 0: + s = len(words) - 1 + for each in words: + s += len(each) + if s > maks: + maks = s + if s < mini: + mini = s +print(maks/mini) +f.close() +from random import randint +def if_a_needed_num_or_not_that_much(x): + tr = False + for num in range (1, 101): + if x == str(num): + tr = True + return tr +t = False +while 4 < 5: + if t == False: + a = randint(0, 100) + 1 + t = True + print('\n\n\nI\'ve chosen a number from 1 to 100, could you guess it? ') + s = input('\nPut a number in here, \nTo stop the game press enter, to restart enter \'I give up\'\n') + if s == '': + break + elif s == 'I give up' or s == 'i give up' or s == 'give up' or s == 'GO FUCK YOURSELF WITH SUCH A NUMBER' or s == '\'I give up\'': + print('My number was ', a) + t = False + continue + elif if_a_needed_num_or_not_that_much(s) == False: + print('OOO!!! I\'m afraid that something has gone terribly wrong! Excuse me...') + continue + i = int(s) + if a == i: + t = False + print('WELL DONE!!! ') + elif a > i: + print('My number is bigger. Try again \n') + else: + print('My number is smaller. Try again \n ')from random import randint +def if_a_needed_num_or_not_that_much(x): + tr = False + for num in range (1, 101): + if x == str(num): + tr = True + return tr +t = False +while 4 < 5: + if t == False: + a = randint(0, 100) + 1 + t = True + print('\n\n\nI\'ve chosen a number from 1 to 100, could you guess it? ') + s = input('\nPut a number in here, \nTo stop the game press enter, to restart enter \'I give up\'\n') + if s == '': + break + elif s == 'I give up' or s == 'i give up' or s == 'give up' or s == 'GO FUCK YOURSELF WITH SUCH A NUMBER' or s == '\'I give up\'': + print('My number was ', a) + t = False + continue + elif if_a_needed_num_or_not_that_much(s) == False: + print('OOO!!! I\'m afraid that something has gone terribly wrong! Excuse me...') + continue + i = int(s) + if a == i: + t = False + print('WELL DONE!!! ') + elif a > i: + print('My number is bigger. Try again \n') + else: + print('My number is smaller. Try again \n ') + +def open_file(): + f = open("Austen Jane. Pride and Prejudice.txt", 'r', encoding = "utf-8") + text = f.read() + f.close() + return text +def clean(st): + arr = st.split() + for idx, word in enumerate(arr): + arr[idx] = word.strip('\'"/.,<>:;[]{}\\|1234567890`~!@ + arr[idx] = arr[idx].lower() + return arr +def find_ness(arr): + new_arr = [] + how_many = [] + for word in arr: + if word.endswith("ness") == True: + if word in new_arr: + for idx, each in enumerate(new_arr): + if each == word: + how_many[idx] += 1 + else: + new_arr.append(word) + how_many.append(1) + st = "" + for word in new_arr: + st += word + st += " " + return st, new_arr, how_many +def find_max(arr, freq_arr): + t = 0 + for number in freq_arr: + if number > t: + t = number + st = '' + for idx, each in enumerate(arr): + if freq_arr[idx] == t: + st += each + st += " " + return st +def main(): + text = open_file() + arr = clean(text) + st1, new_arr, how_many = find_ness(arr) + st2 = find_max(new_arr, how_many) + print(st1) + print(st2) +main() + +import random + + +vowels = "АЕЁИОУЫЭЮЯаеёиоуыэюя" +stressed_vowels = "АЕЁИОУЫЭЮЯ" +non_stressed_vowels ="аеёиоуыэюя" +strng = '' +def word_read(): + f = open("stressed_out.txt", 'r', encoding = "utf-8") + f2 = f.read() + f.close() + words = f2.split('\n') + return words +def word_info_vowels(words): + vowel_info = [] + for indx, word in enumerate(words): + vowel_info.append(0) + for letter in word: + if letter in vowels: + vowel_info[indx] += 1 + return vowel_info +def word_info_stresses(words): + stress_info = [] + for indx, word in enumerate(words): + stress_info.append(0) + vow = 0 + for letter in word: + if letter in vowels: + vow += 1 + if letter in stressed_vowels: + stress_info[indx] = vow + return stress_info +def strng_tv(): + t_v = 0 + for letter in strng: + if letter in stressed_vowels: + t_v = 0 + if letter in non_stressed_vowels: + t_v += 1 + return t_v +def strng_tsv(): + t_s_v = 0 + for letter in strng: + if letter in stressed_vowels: + t_s_v += 1 + return t_s_v +def new_word_in_line(words, vowel_info, stress_info): + global strng + t_v = strng_tv() + t_s_v = strng_tsv() + if (t_v == 0) and (t_s_v == 0): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] < 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 0) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 3) and (vowel_info[n] < 6): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 0) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 3) and (vowel_info[n] == 5): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 1) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 2) and (vowel_info[n] < 5): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 1) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 2) and (vowel_info[n] == 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 2) and ((t_s_v == 1) or (t_s_v == 2)): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] < 4): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " + if (t_v == 2) and (t_s_v == 3): + d = 0 + while d == 0: + n = random.randint(0, len(words) - 1) + if (stress_info[n] == 1) and (vowel_info[n] == 3): + strng += words[n] + strng += " " + d += 1 + if (stress_info[n] == 0) and (vowel_info[n] == 0): + strng += words[n] + strng += " " +def create_line(words, vowel_info, stress_info): + global strng + strng = "" + for n in range(4): + new_word_in_line(words, vowel_info, stress_info) + new_word_in_line(words, vowel_info, stress_info) +def create_txt(words, vowel_info, stress_info): + global strng + for n in range(4): + create_line(words, vowel_info, stress_info) + print(strng.lower().capitalize()) +def main(): + words = word_read() + vowel_info = word_info_vowels(words) + stress_info = word_info_stresses(words) + create_txt(words, vowel_info, stress_info) +main() +word = input('введите что ли слово ') +string = "" +for letter in word: + string += letter + print(string) + +import re +def open_file(): + f = open("isl.xml", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st +def count_lines(st): + n = 0 + for each in st: + if (each == "\n"): + n += 1 + return n + 1 +def wrt(n): + f = open('new.txt', 'w') + f.write(str(n) + '\n') + f.close() +def main(): + st = open_file() + n = count_lines(st) + wrt(n) +main() + +import re +def open_file(): + f = open("isl.xml", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st + +def create_dic(st): + dic = {} + reg = '()(.*?)()' + m = re.findall(reg, st) + for exp in m: + if exp[3] in dic: + dic[exp[3]] += 1 + else: + dic[exp[3]] = 1 + return dic +def wrt(dic): + f = open('new.txt', 'w') + for each in dic: + f.write(each) + f.close() +def main(): + st = open_file() + dic = create_dic(st) + wrt(dic) +main() + + + + + +import re +def open_file(): + f = open("isl.xml", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st + +def create_dic(st): + dic = {} + reg = '()(.*?)()' + m = re.findall(reg, st) + for exp in m: + if exp[3] in dic: + dic[exp[3]] += 1 + else: + dic[exp[3]] = 1 + return dic +def wrt(dic): + f = open('new.txt', 'w') + for each in dic: + f.write(each) + f.close() +def main(): + st = open_file() + dic = create_dic(st) + wrt(dic) +main() + +import re +def open_file(): + f = open("SPB.html", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st +def find_names(st): + exp = '(">)(UTC)([0-9+-]*?)()' + arr = re.findall(exp, st) + return arr +def clean_res(arr): + new_arr = [] + for each in arr: + res = each[1] + each[2] + if res not in new_arr: + new_arr.append(res) + return new_arr +def wrt(arr): + f = open('new.txt', 'w') + for time in arr: + f.write(time + '\n') + f.close() +def main(): + st = open_file() + arr = find_names(st) + new_arr = clean_res(arr) + wrt(new_arr) +main() + + + +import os +numbers = '1234567890' +def list_of_folders(): + files = {} + for f in os.listdir(): + if f in files: + files[f] += 1 + else: + files[f] = 1 + return files +def no_numbers(files): + clean_files = {} + for f in files: + t = 0 + for number in numbers: + if number in f: + t += 1 + if f in clean_files and t == 0: + clean_files[f] += 1 + elif f not in clean_files and t == 0: + clean_files[f] = 1 + return clean_files +def print_out(clean_files, files): + n = 0 + for f in clean_files: + n += 1 + print(n) + for fi in files: + print(fi) +def main(): + files = list_of_folders() + clean_files = no_numbers(files) + print_out(clean_files, files) +main() + +import re +def open_file(): + f = open("oister.txt", 'r', encoding = "utf-8") + st = f.read() + f.close() + return st +def clean(st): + arr_dirty = st.split() + for idx, word in enumerate(arr_dirty): + arr_dirty[idx] = word.strip('\'"/.,<>:;[]{}\\|1234567890`~!@ + arr = [] + for each in arr_dirty: + if (each != ""): + st2 = " " + each + " " + arr.append(st2) + return arr +def make_st(arr): + st = '' + for word in arr: + if word != "": + st += word + st += " " + return st +def find_find(arr): + exp = ' на(й|ш)(т|д|е|ё|л)(и|у|е|ё|л|д|я|а|о)(т|ш|м|н)?(е|ь|н|а|о|ы|ий|ие|ая|ее)?(ый|ая|ое|ые)?(ся|сь)? ' + for word in arr: + if re.search(exp, word): + print(word) +def main(): + st = open_file() + arr = clean(st) + find_find(arr) +main() +import os +import shutil +def remove(folder): + for root, dirs, files in os.walk(folder): + for f in files: + os.remove(f) + for d in dirs: + os.rmdir(d) + os.rmdir(folder) +remove('C:\\Users\\student\\Desktop\\papka\\pp') +нужно как-тр делать через os.path.join +Хождение по папкам +for root, dirs, files in os.walk('.'): + print(root, dirs) +путь к файлу чтобы его открыть +os.path.join(root, fname) +file_tree = os.walk('.') +for d in file_tree: + print(d) +for root, dirs, files in os.walk('.'): + print(root) - идем в по папкам, если в одной из папок есть еще папки - сначала идем вглубь, затем к следующим папкам +можно идти снизу вверх +for root, dirs, files in os.walk('.', topdown=False): + print(root) +смотрим файлы +кортеж(tuple) работает почти как массив, но +- пишется в круглых скобках: + t = (1,2,3) +- его нельзя изменять после создания: + t[1] = 4 - так нельзя +кортеж можно использовать в кач-ве ключа словаря: + dic[(1,2,3)] = r'питон' - кладем в словарь элементы и даем значение питон + dic[[1,2,3]] = r'питон' - так нельзя + потому что кортеж hashable, а массив нет +можно сделать частотный словарь +dic(r'мой', r'V'] = 20 +dic(r'мой', r'RPO'] = 5 +кортеж может возвращать любая функция +a, b, c = func() +a = func() +print(a) >>> 1,2,3 +print(a[1]) >>> 2 +Замена +m = re.sub(r'[^;]+', r'', s) +Все НЕ точки с запятой меняем на пустоту в строке s +в том что меняем нельзя использовать регулярные выражение +re.search - возвращает объект типа match +re.findall - возвращает массив кортежей +re.sub - возвращает просто строку +m = re.sub(r'\bкоше?к', r'собак', s) +text = re.sub(r'\bКоше?к', r'Cобак', m) - второй раз меняем уже не в первой строке, а в новой, измененной ранее!!! +r'\w+' значит что в строке не экранируемые символы +замена повторяющихся слов +m = re.sub(r'\\w+) \\1', r'\\1', s) +что что заменит на что +не нервничайте - будет работать плохо +флаги немного меняют поведение регулярных выражений +re.DOTALL: + . значит все кроме переноса строки + иногда хотим чтобы она включала /n, нам нужно написать flags = re.DOTALL + все точки в регулярном выражении начнут значить любой знак и перенос строки +удвоение всех слов: + m = re.sub(r'\\w+', u'\\1 \\1', s) +контекстное удаление: + m = re.sub(r'([иео]).([иео])', +[\w0-9] любая не буква или цифры от 0 до 9 +убрать все html теги из документа + m = re.sub(r'<.*?>', r'', s, flags = ) + Привет станет Привет +import re +def opentext(fname): + with open (fname, 'r', encoding = 'utf-8') as t: + text = t.read() + return text +def deltag(fname): + text = opentext(fname) + m = re.sub(r'<.*?>', r' ', text) + text = re.sub(r'\s+', r' ', m) + return text +def byeSteve(fname): + text = deltag(fname) + m = re.sub(r'Стив Джобс', r'Сабрина Маленькая Ведьма', text) + print(m) +def syllable(fname): + opentext() +def align_right(): + arr = ['Kate', 'potato', 'Sasha', 'Okun', 'Валерка'] + for i in arr: + print('{:>10}'.format(i)) +align_right() + +a = [1,2,3,4,5,6,7,8,9] +b = [] +for i in a: + b.append(i**2) +МОжем сделать то же самое в одну строчку +new_b = [i**2 for i in a] +words = ['Mary', 'John', 'Jack', 'Tim', 'Kate', 'Tom', 'Moses', 'Jesus'] +new_words = [w.upper() for w in words] +на выходе - MARY JOHN JAKE TIM... +b = [] +for i in a: + if i < 10 and i%2 == 0: + b.append(i**2) +new_b = [i**2 for i in a if i < 10 and i%2 == 0] +other_words = [w.upper() for w in words if re.search('[aAjJ]', w)] + +вместо массивов собираем словари +d = {"корова": "му", "собака": "гав", "кот": "мяу", "свинюга": "хрю"} +sounds = {d[key]: key for key in d} +sounds = {d[key]: key for key in d if len(key) > 4} + +big = [a, new_b, words] +flat = [] +for arr in big: + for item in arr: + flat.append(item) +или: +flat = [item for arr in big for item in arr] + + +s = 'Hello, world! That\'s all folks!' +s.upper() +s.lower() +s.capitalize() +s.title() +template = 'Hello, {}!' +template.format('John') +name = 'Mary' +template.format(name) +template.format(input('Введите имя!!!!!!!12!1111: ')) +template = 'Привет, {1} {0}! Вы, {0}, наш самый ценный клиент. ТОлько вам, {0} {1}, и только сегодня мы предлагаем шкурку от бананов!!!'.format('ПЕтя', 'Иванов') +arr = [21, 45, 100, 4, 5, 6, 6, 99] +template = 'Возраст: {:>10}' стрелочки - это выравнивание. < справа, > слева, ^ посередине. число - минимальное окно, в котором текст. +через двоеточие вводим изменения в форматировании +for i in arr: + print(template.format(i)) +'{:+>10}'.format('text') +при выравнивании текст заполнится плюсиками вместо пробелов +pi = 3.14159265358979323 +'Ваше число {:.2f}'.format(pi) +f означает что число дробное и берем два знака после запятой, а не просто два знака +'{:+>10}'.format('эйяфладлайяокудль') +import re +def opentext(fname): + with open (fname, 'r', encoding = 'utf-8') as t: + text = t.read() + return text +def finddata(fname): + text = opentext(fname) + reg = '' + if re.search(reg, text): + card = re.search(reg, text).group() + profreg = 'Преподаватели(.|\n)*?

(.+?)<' + if re.search(profreg, card): + number = re.search(profreg, card).group(2) + with open ('data.txt', 'w', encoding = 'utf-8') as t: + t.write(number) + else: + print('Информации о преподавателях нет') + with open ('data.txt', 'w', encoding = 'utf-8') as t: + t.write('Информации о преподавателях нет') + else: + print('В данной статье нет инфобокса') + with open ('data.txt', 'w', encoding = 'utf-8') as t: + t.write('В данной статье нет инфобокса') +def main(): + text = input('Введите название файла: ') + finddata(text) +if __name__ == '__main__': + main() + +import os +import shutil +import re +def findext(): + d = {} + ext = '(.*\.)(.*)' + for root, dirs, files in os.walk('.'): + for f in files: + if re.search(ext, f).group(2) not in d: + d[re.search(ext, f).group(2)]= '1' + else: + d[re.search(ext, f).group(2)] = str(int(d[re.search(ext, f).group(2)]) + 1) + return d +def findmax(): + d = findext() + k = 0 + extm = '' + for key in d: + if int(d[key]) > k: + k = int(d[key]) + extm = key + elif int(d[key]) == k: + extm = extm + ', ' + key + print('В текущей папке и в папках, лежащих в ней, наиболее часто встречаются файлы с расширениями: ' + extm + '. Они встречаются ' + str(k) + ' раз.') +def main(): + findmax() +if __name__ == '__main__': + main() + +word = input('Введите слово ') +array = [] +while word: + array.append(word) + word = input('Введите слово ') +for i in range(len(array)): + newword = array[i] + newword = newword[i+1 : ] + print(newword) +def opentext(text): + words = [] + with open (text, 'r', encoding = 'utf-8') as t: + newtext = t.read() + newtext = newtext.lower() + words = newtext.split() + for i in range(len(words)): + words[i] = words[i].strip('”“".,!?') + return words +def numbhood(text): + words = opentext(text) + hood = [] + for i in range(len(words)): + if len(words[i])>4: + if words[i].endswith('hood'): + if words[i] not in hood: + hood.append(words[i]) + return hood +def frequency(text, word): + words = opentext(text) + freq = 0 + for i in range(len(words)): + if words[i] == word: + freq += 1 + return freq +text = input('Введите имя файла с английским текстом: ') +hood = numbhood(text) +print('В тексте нашлось', len(hood), 'существительных с суффиксом -hood.') +hfreq = [] +for i in range(len(hood)): + hfreq.append(frequency(text, hood[i])) +minfreq = [] +for i in range(len(hood)): + if hfreq[i] == min(hfreq): + minfreq.append(hood[i]) + forms = [] +for i in range(len(minfreq)): + forms.append(minfreq[i][0:-4]) +', '.join(forms) +', '.join(minfreq) +print('Существительные с суффиксом -hood, имеющие наименьшую частотность в тексте: ' + str(minfreq)) +print('Они образованы от слов: ' + str(forms)) + +import re +def opentext(fname): + with open (fname, 'r', encoding = 'utf-8') as t: + text = t.read() + return text +def sublanguage(fname): + text = opentext(fname) + lang = '(язык)(и|а(ми?|х)?|у|о[мв]|е)?(\s|\.| |\?|\'|,|-|"|»|!|\(|\)|;|:)' + Lang = '(Язык)(и|а(ми?|х)?|у|о[мв]|е)?(\s|\.| |\?|\'|,|-|"|»|!|\(|\)|;|:)' + l = re.search(lang, text) + L = re.search(Lang, text) + if re.search(lang, text): + text = re.sub(l.group(1), 'шашлык', text) + if re.search(Lang, text): + text = re.sub(L.group(1), 'Шашлык', text) + return text +def savenew(fname): + text = sublanguage(fname) + with open ('newlingua.html', 'w', encoding = 'utf-8') as t: + t.write(text) +def main(): + savenew('lingua.html') +if __name__ == '__main__': + main() + +import re +def opentext(text): + sentences = [] + with open (text, 'r', encoding = 'utf-8') as t: + newtext = t.read() + newtext = re.sub('\n', ' ', newtext) + sentences = re.split('\?|!|\?!|\.\.\.|\.|…', newtext) + sentences = [re.sub('[”“"–«»:;(),]', '', i) for i in sentences] + return sentences +def makewordlen(text): + sentences = opentext(text) + wordlen = [[i, len(i)] for s in sentences for i in s.split()] + return wordlen +def form(text): + wordlen = makewordlen(text) + template = '{}_{}' + for w in range(len(wordlen)): + print(template.format(wordlen[w][0], wordlen[w][1])) +def main(): + form('телеграмма.txt') +if __name__ == '__main__': + main() + +freq = [] +conj = [] +with open('text.txt','r', encoding = 'utf-8') as f: + text = f.read() + freq = text.split('\n') +for word in freq: + conj = word.split(' | ') + if conj[1] == 'союз': + print(word) + +female = [] +string = ' ' +words = [] +ipm = 0 +for word in freq: + words = word.split(' ') + if words[4] == 'ед' and words[5] == 'жен': + female = word.split(' | ') + strint += female[0] + ',' + ipm += int(female[2]) +print(string) +print('Сумма всех ipm слов женского рода единственного числа равна', ipm , '.') + +newword = input('Введите слово') +arr = [] +r = [] +while newword: + arr.append(newword) + newword = input('Введите слово') +for word in freq: + r = word.split(' | ') + for newword in arr: + if r[0] == newword: + print(word) + else: + print('Слова ', i , ' нет в словаре.') +import re +forms = [] +words = [] +with open ('text.txt', 'r', encoding = 'utf-8') as t: + texxt = t.read() + texxt = texxt.lower() +words = texxt.split() +for i in range(len(words)): + words[i] = words[i].strip('”“".,!?') +prog = r"\b[п]рограммир(ова(в(ш((ий|ая|ее|его|ему|им|ем|ей|ую|ей)(ся)?|и(сь)?))?|ть|л(ся|[аои](сь)?)?)|у(ю(сь|(т|щ(ий|ая|ее|его|ему|им|ем|ую|ей|))(ся)?)?|е((шь|т|м)(ся)?|те(сь)?)|я(сь)?))\b" +for i in range(len(words)): + if re.search(prog,words[i]) != None: + if words[i] not in forms: + forms.append(words[i]) +print('В тексте встретились такие формы глагола "программировать": ' + ', '.join(forms) + '.') +кортеж(tuple) работает почти как массив, но +- пишется в круглых скобках: + t = (1,2,3) +- его нельзя изменять после создания: + t[1] = 4 - так нельзя +кортеж можно использовать в кач-ве ключа словаря: + dic[(1,2,3)] = r'питон' - кладем в словарь элементы и даем значение питон + dic[[1,2,3]] = r'питон' - так нельзя + потому что кортеж hashable, а массив нет +можно сделать частотный словарь +dic(r'мой', r'V'] = 20 +dic(r'мой', r'RPO'] = 5 +кортеж может возвращать любая функция +a, b, c = func() +a = func() +print(a) >>> 1,2,3 +print(a[1]) >>> 2 +Замена +m = re.sub(r'[^;]+', r'', s) +Все НЕ точки с запятой меняем на пустоту в строке s +в том что меняем нельзя использовать регулярные выражение +re.search - возвращает объект типа match +re.findall - возвращает массив кортежей +re.sub - возвращает просто строку +m = re.sub(r'\bкоше?к', r'собак', s) +text = re.sub(r'\bКоше?к', r'Cобак', m) - второй раз меняем уже не в первой строке, а в новой, измененной ранее!!! +r'\w+' значит что в строке не экранируемые символы +замена повторяющихся слов +m = re.sub(r'\\w+) \\1', r'\\1', s) +что что заменит на что +не нервничайте - будет работать плохо +флаги немного меняют поведение регулярных выражений +re.DOTALL: + . значит все кроме переноса строки + иногда хотим чтобы она включала /n, нам нужно написать flags = re.DOTALL + все точки в регулярном выражении начнут значить любой знак и перенос строки +удвоение всех слов: + m = re.sub(r'\\w+', u'\\1 \\1', s) +контекстное удаление: + m = re.sub(r'([иео]).([иео])', +[\w0-9] любая не буква или цифры от 0 до 9 +убрать все html теги из документа + m = re.sub(r'<.*?>', r'', s, flags = ) + Привет станет Привет +import re +def opentext(fname): + with open (fname, 'r', encoding = 'utf-8') as t: + text = t.read() + return text +def deltag(fname): + text = opentext(fname) + m = re.sub(r'<.*?>', r' ', text) + text = re.sub(r'\s+', r' ', m) + return text +def byeSteve(fname): + text = deltag(fname) + m = re.sub(r'Стив Джобс', r'Сабрина Маленькая Ведьма', text) + print(m) +def syllable(fname): + opentext() +import re +lemma = r'' +def openlines(fname): + lines = [] + with open (fname, 'r', encoding = 'utf-8') as t: + lines = t.readlines() + return lines +def writelines(fname, text): + lines = openlines(fname) + with open ('lines.txt', 'w', encoding = 'utf-8') as f: + f.write(str((len(lines)))) +def opentext(fname): + text = [] + with open (fname, 'r', encoding = 'utf-8') as t: + text = t.read() + return text +def lemm(fname): + text = opentext(fname) + lemmas = re.findall(lemma, text) + return lemmas +def freq(fname): + lemmas = lemm(fname) + d = {} + for i in range(len(lemmas)): + if lemmas[i] in d: + d[lemmas[i]] += 1 + else: + d[lemmas[i]] = 1 + return d +def writekeys(fname): + d = freq(fname) + with open ('keys.txt', 'w', encoding = 'utf-8') as f: + for key in d: + f.write(key + '\n') +def main(): + writelines('file.xml', 'lines.txt') + writekeys('file.xml') + +def makepuzzle(words): + puzzle = {} + strings = [] + word = [] + with open (words, 'r', encoding = 'utf-8') as w: + strings = w.readlines() + for i in range(len(strings)): + strings[i] = strings[i].strip('\n') + for i in range(len(strings)): + word = strings[i].split(';') + puzzle[word[0]] = word[1] + return puzzle +def trytoguess(): + puzzle = makepuzzle('words.csv') + for key in puzzle: + print(puzzle[key], '...') + guess = input('Дополните это словосочетание: ') + for i in range(len(puzzle[key])): + if guess == key: + print("Вы угадали!!! Это" , '"' + puzzle[key] , key + '".') + break + guess = input('Вы не угадали, попробуйте еще раз: ') + if i == len(puzzle[key]) - 1: + print('Вы проиграли.') +trytoguess() + +import re +import os +import shutil + +def number(folder): + reg = '' + for i in os.listdir(folder): + arr = [] + with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: + text = t.read() + for t in re.findall(reg, text): + arr.append(t) + with open('sentences.txt', 'a', encoding = 'utf-8') as f: + f.write(i+'\t'+str(len(arr))+'\n') + +def table(folder): + with open('info.csv', 'w', encoding = 'utf-8') as f: + f.write('Файл ; Автор ; Тема \n') + for i in os.listdir(folder): + auth = '' + topic = '' + with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: + text = t.read() + for t in re.findall(auth, text): + for j in re.findall(topic, text): + with open('info.csv', 'a', encoding = 'utf-8') as f: + f.write(i+' ; '+t+' ; '+j+'\n') + +def bi(folder): + sen = '((.|\n)*?)' + pr = '' + loc = 'gr="S.*?loc">(\w*)<' + sentence = '' + word = '(\w*)/.' + for i in os.listdir(folder): + with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: + text = t.read() + for s in re.findall(sen, text): + print(s) + for p in re.findall(pr, s): + for l in re.findall(loc, s): + for i in re.findall(word, s): + sentence = sentence + i + ' ' + with open('bigr.txt', 'a', encoding = 'utf-8') as f: + f.write(p+' '+l+'\t' + sentence + '\n') +def main(): + number('news') + table('news') + bi('news') +if __name__ == '__main__': + main() +import re +def opentext(text): + with open (text, 'r', encoding = 'utf-8') as t: + text = t.read() + return(text) +def anawords(text): + t = opentext(text) + nwords = re.findall(r'', t) + nana = re.findall(r'(.*?)', t) + reg = '<.*=ins.*>' + com = '(\w+)<' + cont = [] + words = [] + for s in range(len(strings)): + if re.search(reg, strings[s]): + word = strings[s-3]+strings[s-2]+strings[s-1]+strings[s]+strings[s+1]+strings[s+2]+strings[s+3] + cont.append(word) + for i in cont: + three = '' + for j in re.findall(com, i): + three = three+j+' ' + words.append(three) + return words +def makeins(text): + words = SIns(text) + with open ('ins.txt', 'w', encoding = 'utf-8') as f: + for w in words: + seven = w.split() + f.write(seven[0]+' '+seven[1]+' '+seven[2]+'\t'+seven[3]+'\t'+seven[4]+' '+seven[5]+' '+seven[6]+'\n') +def main(): + anawords('text.xml') + makefreq('text.xml') + makeins('text.xml') +if __name__ == '__main__': + main() +a = input('Введите число a') +b = input('Введите число b') +c = input('Введите число c') +a = int(a) +b = int(b) +c = int(c) +if a%b==c: + print('Остаток от деления a на b равен c') +else: print('Остаток от деления a на b не равен c') +if a*c+b==0: + print('Число с является решением уравнения "ax+b=0"') +else: print('Число с не является решением уравнения "ax+b=0"') +input() + +word = input('Введите слово: ') +for i in range(len(word)): + newword = (word[i:] + word[:i]) + print(newword) + +arr = [] +with open('text.txt','r', encoding = 'utf-8') as t: + text = t.read() + text = text.replace('\n', ' ') + arr = text.split(' ') +len1 = 0 +len3 = 0 +for word in arr: + if len(word) == 1: + len1 += 1 + elif len(word) == 3: + len3 += 1 +if len1 == 0: + print('В тексте нет слов длиной в 1 символ') +elif len3 == 0: + print('В тексте нет слов длиной в 3 символа') +else: + dif = str(len3/len1) + print('В тексте в ' + dif + ' раз больше слов длиной в 3 символа, чем слов длиной в 1 символ') + +a=input("Введите слово") +a=a[::-1] +i=0 +for letter in a: + if letter!='з'and letter!='я': + print(letter) + i+=1 +input() + +import random +def noun(number): + if number == 's': + s = [] + with open('snouns.txt','r', encoding = 'utf-8') as n: + snoun = n.read() + s = snoun.split(' ') + return random.choice(s) + pl = [] + with open('plnouns.txt','r', encoding = 'utf-8') as nn: + plnoun = nn.read() + pl = plnoun.split(' ') + return random.choice(pl) +def verb(numb): + if numb == 's': + sv = [] + with open('sverbs.txt','r', encoding = 'utf-8') as v: + sverb = v.read() + sv = sverb.split(' ') + return random.choice(sv) + plv = [] + with open('plverbs.txt','r', encoding = 'utf-8') as v: + plverb = v.read() + plv = plverb.split(' ') + return random.choice(plv) +def modif(): + am = [] + with open('modif.txt','r', encoding = 'utf-8') as m: + modifier = m.read() + am = modifier.split(' ') + return random.choice(am) +def imperative(): + imp = [] + with open('imperative.txt','r', encoding = 'utf-8') as i: + imper = i.read() + imp = imper.split(' ') + return random.choice(imp) +def conconj(): + con = [] + with open('condconj.txt','r', encoding = 'utf-8') as co: + cond = co.read() + con = cond.split(' ') + return random.choice(con) +def conjunction(): + conj = [] + with open('conj.txt','r', encoding = 'utf-8') as c: + conjs = c.read() + conj = conjs.split(' ') + return random.choice(conj) +def sentence1(): + return noun('s') + ' ' + verb('s') + ' ' + modif() + '.' +def sentence2(): + return noun('pl') + ' ' + verb('pl') + ' ' + modif() + '?' +def sentence3(): + return conconj() + ' ' + noun('pl') + ' - ' + noun('pl') + ', ' + conjunction() + ' ' + noun('s') + ' ' + verb('s') + '.' +def sentence4(): + return noun('pl') + ' не ' + verb('pl') + ' ' + modif() + '.' +def sentence5(): + return noun('s') + ', ' + imperative() + ' ' + modif() + '!' +def make_text(): + text = 0 + text = random.choice([1,2,3,4,5]) + while text: + if text == 1: + print(sentence1()) + text = random.choice([2,3,4,5]) + if text == 2: + print(sentence2()) + text = random.choice([3,4,5]) + if text == 3: + print(sentence3()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + else: + print(sentence5()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([2,4,5]) + if text == 2: + print(sentence2()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + else: + print(sentence5()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + elif text == 4: + print(sentence4()) + text = random.choice([2,3,5]) + if text == 2: + print(sentence2()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + else: + print(sentence5()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + else: + print(sentence5()) + text = random.choice([2,3,4]) + if text == 2: + print(sentence2()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + else: + print(sentence4()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,3,4,5]) + if text == 1: + print(sentence1()) + text = random.choice([3,4,5]) + if text == 3: + print(sentence3()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + else: + print(sentence5()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,4,5]) + if text == 1: + print(sentence1()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + elif text == 4: + print(sentence4()) + text = random.choice([1,3,5]) + if text == 1: + print(sentence1()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,3,4]) + if text == 1: + print(sentence1()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + else: + print(sentence4()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,2,4,5]) + if text == 1: + print(sentence1()) + text = random.choice([2,4,5]) + if text == 2: + print(sentence2()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + else: + print(sentence5()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,4,5]) + if text == 1: + print(sentence1()) + text = random.choice([4,5]) + if text == 4: + print(sentence4()) + print(sentence5()) + else: + print(sentence5()) + print(sentence4()) + break + elif text == 4: + print(sentence4()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + elif text == 4: + print(sentence4()) + text = random.choice([1,2,5]) + if text == 1: + print(sentence1()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,2,4]) + if text == 1: + print(sentence1()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + else: + print(sentence4()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break + elif text == 4: + print(sentence4()) + text = random.choice([1,2,3,5]) + if text == 1: + print(sentence1()) + text = random.choice([2,3,5]) + if text == 2: + print(sentence2()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + else: + print(sentence5()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,3,5]) + if text == 1: + print(sentence1()) + text = random.choice([3,5]) + if text == 3: + print(sentence3()) + print(sentence5()) + else: + print(sentence5()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,2,5]) + if text == 1: + print(sentence1()) + text = random.choice([2,5]) + if text == 2: + print(sentence2()) + print(sentence5()) + else: + print(sentence5()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,5]) + if text == 1: + print(sentence1()) + print(sentence5()) + else: + print(sentence5()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,2,3]) + if text == 1: + print(sentence1()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + else: + print(sentence3()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break + else: + print(sentence5()) + text = random.choice([1,2,3,4]) + if text == 1: + print(sentence1()) + text = random.choice([2,3,4]) + if text == 2: + print(sentence2()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + else: + print(sentence4()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,3,4]) + if text == 1: + print(sentence1()) + text = random.choice([3,4]) + if text == 3: + print(sentence3()) + print(sentence4()) + else: + print(sentence4()) + print(sentence3()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + else: + print(sentence4()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + elif text == 3: + print(sentence3()) + text = random.choice([1,2,4]) + if text == 1: + print(sentence1()) + text = random.choice([2,4]) + if text == 2: + print(sentence2()) + print(sentence4()) + else: + print(sentence4()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,4]) + if text == 1: + print(sentence1()) + print(sentence4()) + else: + print(sentence4()) + print(sentence1()) + break + else: + print(sentence4()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break + else: + print(sentence4()) + text = random.choice([1,2,3]) + if text == 1: + print(sentence1()) + text = random.choice([2,3]) + if text == 2: + print(sentence2()) + print(sentence3()) + else: + print(sentence3()) + print(sentence2()) + break + elif text == 2: + print(sentence2()) + text = random.choice([1,3]) + if text == 1: + print(sentence1()) + print(sentence3()) + else: + print(sentence3()) + print(sentence1()) + break + else: + print(sentence3()) + text = random.choice([1,2]) + if text == 1: + print(sentence1()) + print(sentence2()) + else: + print(sentence2()) + print(sentence1()) + break +make_text() + +модули +import os +import shutil +Windows C:\\Users\\student\\Downloads - экранируем слэши +(на маке и линуксе слеши наоборот) +os.path.abspath('.') - абсолютный путь к папке, в которой я нахожусь ссейчас(точка - текущая папка) +os.getcwd - то же самое +os.path.join('texts','1.txt') соединяет название файла и папки +os.path.exists('texts') - проверяет, есть ли такая папка или файл +os.listdir('.') - возвращает массив со строками-именами файлов в папке +s = 'приветки!' +i = 1 +for f in os.listdir('.'): + if f.endswith('.txt'): + with open(f, 'a', encoding = 'utf-8') as w: + w.write(s*i) + i += 1 +Если в папке есть файлы txt, то в них записывается слово приветки i раз +os.mkdr('folder1') - создать папку +os.makedirs('a\\long\\volk\\kot') - создать папки в папке +os.rename('Староеимя', 'новоеимя') - переименовать файл или папку +os.path.isfile(r'texts\corpus1.txt') - проверяет, является ли то, что задано файлом (r для неэкранирования) +os.path.isdir(r'texts\lalala.txt') - проверяет, является ли папкой +shutil.copy('texts\\corpus1.txt', 'newcorpus\\corpus1.txt') - копирует файл из папки в другую папку +shutil.copytree('texts', 'corpus') - копирует все из одной папки в новую которая создается +shutil.move('texts\\lala.txt', 'corpus\\lala.txt') - перемещает файл из папки в другую папку +os.remove(r'corpus\corpus2.txt') - удаляет файл +shutil.rmtree('texts') - удаляет папку +import os +import shutil +s = input('Введите предложение на англ языке: ') +s_name = s.replace(' ', '\\') +os.makedirs(s_name) +import os +import shutil +n = int(input('Введите число: ')) +for i in range(1, n+1): + os.mkdir(str(i)) + for j in range(1, i+1): + with open(str(i)+'\\'+str(j) + '.txt', 'w', encoding="utf-8") as f: + f.write(' ') + +import re +import os +def dirs(): + kir = '[А-Яа-яЁё]*' + stuff = '[A-Za-z\.\?!0-9"@№;%:?*_()-+= + names = [] + for f in os.listdir('.'): + if os.path.isdir(f) and re.search(kir, f) and re.search(stuff, f) == None: + names.append(f) + return names +def answer(): + names = dirs() + if names == []: + print("В текущей папке нет папок, название которых состоит только из кириллических символов.") + else: + print("В текущей папке " + str((len(names))) + " папок, название которых состоит только из кириллических символов.") +def allfiles(): + files = [] + name = '.*\.' + for f in os.listdir('.'): + if os.path.isdir(f) and f not in files: + files.append(f) + elif os.path.isfile(f) and re.search(name, f): + n = re.search(name, f).group(0) + n = n.strip('.') + if n not in files: + files.append(n) + print("Вот названия всех найденных в текущей папке файлов и папок: ") + for f in files: + print(f) +def main(): + answer() + allfiles() +if __name__ == '__main__': + main() +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + words = text_wo_punct.strip().split() + return words +import os +anek = '' +teh = '' +izvest = '' +for root, dirs, files in os.walk('texts'): + for f in files: + if 'anekdots' in root: + num_anek = len(files) + anek += open(os.path.join(root, f), encoding='utf-8').read() + elif 'izvest' in root: + num_izvest = len(files) + izvest += open(os.path.join(root, f), encoding='utf-8').read() + elif 'teh_mol' in root: + num_teh = len(files) + teh += open(os.path.join(root, f), encoding='utf-8').read() +words_anek = preprocessing(anek) +words_teh = preprocessing(teh) +words_izvest = preprocessing(izvest) +words = words_anek + words_teh + words_izvest +def freq_dict(arr): + dic = {} + for element in arr: + if element in dic: + dic[element] += 1 + else: + dic[element] = 1 + return dic +corpus_freq = freq_dict(words) +anek_freq = freq_dict(words_anek) +izvest_freq = freq_dict(words_izvest) +teh_freq = freq_dict(words_teh) +def pmi_for_cats(x, y): + if y == 'anek': + dic = anek_freq + num = num_anek + elif y == 'teh': + dic = teh_freq + num = num_teh + elif y == 'izvest': + dic = izvest_freq + num = num_izvest + p_xy = dic[x]/len(dic) + p_x, p_y = corpus_freq[x]/len(corpus_freq), num/(num_izvest + num_teh + num_anek) + pmi = log(p_xy/(p_x * p_y)) + return pmi +cat_pmi = {} +i = 0 +for word in corpus_freq: + if i > 100: + break + try: + pmi_anek = pmi_for_cats(word, 'anek') + except KeyError: + pmi_anek = 0 + try: + pmi_teh = pmi_for_cats(word, 'teh') + except KeyError: + pmi_teh = 0 + try: + pmi_izvest = pmi_for_cats(word, 'izvest') + except KeyError: + pmi_izvest = 0 + max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) + if max_pmi == 0: + continue + if max_pmi == pmi_anek: + cat = 'anek' + elif max_pmi == pmi_teh: + cat = 'teh' + elif max_pmi == pmi_izvest: + cat = 'izvest' + print(word, cat) + i += 1 +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + words = text_wo_punct.strip().split() + return words +with open('news.txt', 'r', encoding='utf-8') as f: + words = preprocessing(f.read()) +word_freq = {} +for word in words: + if word in word_freq: + word_freq[word] += 1 + else: + word_freq[word] = 1 +bigrams = [] +for ind in range(1, len(words) - 1): + bigrams.append(' '.join([words[ind - 1], words[ind]])) +bigram_freq = {} +for b in bigrams: + if b in bigram_freq: + bigram_freq[b] += 1 + else: + bigram_freq[b] = 1 +def count_pmi(x, y): + p_xy = bigram_freq[' '.join([x, y])]/len(bigram_freq) + p_x, p_y = word_freq[x]/len(word_freq), word_freq[y]/len(word_freq) + pmi = log(p_xy/(p_x * p_y)) + return pmi +pmi = {} +for bigr in bigrams: + x, y = bigr.split() + pmi[bigr] = count_pmi(x, y) +i = 0 +for bigram in sorted(pmi, key = lambda m: -pmi[m]): + if i > 100: + break + print(bigram, pmi[bigram]) + i += 1 + +a = int(input()) +b = int(input()) +c = int(input()) +if a * b == c: + print ('Обладает 1 свойством') +if a / b == c: + print ('Обладает 2 свойством') +else: + print ('Не обладает свойствами') +import random +d = {'Алые': 'Паруса', + 'Пластмассовый': 'Мир', + 'Белые': 'Розы', + 'Синее': 'Море', + 'Черный': 'Передел'} +key_list = [] +for key in d: + key_list.append(key) +print("Подсказка: ") +k = random.randint(1,5) +print(d[key_list[k]]) +count = 0 +while(input() != key_list[k]): + count+=1 + print("Попытка номер ",count) + count = count + 1 +print("Ура, вы выиграли!") +print(count) +import re +def search(): + with open('F.xml', 'r', encoding = 'utf - 8') as f: + k = 5 + for line in f: + if '' in line: + break + elif '' not in line: + k = k + 1 + print(k) + f.close() + f = open('number.txt', 'w',encoding = 'utf-8') + k = str(k) + f.write(k) + f.close() +search() +def dictn(): + d={} + with open('F.xml', 'r', encoding = 'utf - 8') as f: + for line in f: + n = re.findall('type="*"', f) + d = {n} + if(key in d): + d[key] = d[key]+1 + else: + d[key]=1 + print(d) +dictn() +import re +pattern = r'[с][ъ][е][^\s.]*' +with open('word.txt', encoding='UTF-8') as file: + for row in file: + find = re.findall(pattern, row) + for i in (len(find)): + print('Найденные элементы') + print(elem[i]) +import os +from os.path import isfile +def search(): + folder = 'C:/Users/Тимур/AppData/Local/Programs/Python/Python35-32' + k = 0 + print(os.listdir(folder)) + for f in os.listdir(folder): + if not isfile(f): + if '_' in f: + k = k + 1 + print('file: ', f) + if ' ' in f: + k = k + 1 + print('file: ', f) + print(k) +search() +a = [] +while True: + word = input('Add a word') + if word ==('') :break + elif word.endswith ('tur'): + a.append(word) +print ('\n'. join(a)) +import re +import os +import csv +def open_file_tree(): + names = {} + file_tree = os.walk('news') + for root, dirs, files in os.walk('news'): + for f in files: + with open(os.path.join(root,f), 'r') as p: + texts = p.readlines() + se_num = 0 + for text in texts: + if '/se' in text: + se_num = se_num + 1 + names[f] = se_num + create_txt(names) +def create_txt(dict): + new = "" + with open("result.txt", "w", encoding="utf-8") as file: + for i in dict.keys(): + new += "\n" + i.strip() + "\t" + str(dict[i]) + file.write(new) +def create_table(): + data1 = u"FILENAME" + 'AUTHOR' + 'DATE' + with open('result2.csv', 'w') as file: + file.write(data1) +def change_table(file,auth,date): + data = "\n" + file + auth + date + with open('result2.csv', 'a') as file: + file.write(data) +def table(): + date = '' + author = '' + create_table() + for root, dirs, files in os.walk('news'): + for f in files: + with open(os.path.join(root, f), 'r') as p: + oneline = p.read() + for i in re.finditer(r"", oneline): + author = i.group(1) + for j in re.finditer(r"", oneline): + date = j.group(1) + change_table(f, author, date) +open_file_tree() +table() +import re +import os +import csv +def open_file_tree(): + names = {} + file_tree = os.walk('news') + for root, dirs, files in os.walk('news'): + for f in files: + with open(os.path.join(root,f), 'r') as f0: + texts = f0.readlines() + se_num = 0 + for text in texts: + if '/se' in text: + se_num = se_num + 1 + names[f] = se_num + create_txt(names) +def create_txt(dict): + new = "" + with open("result.txt", "w", encoding="utf-8") as file: + for i in dict.keys(): + new += "\n" + i.strip() + "\t" + str(dict[i]) + file.write(new) +open_file_tree() +import re +def replace(): + with open('dino.html', 'r', encoding = 'utf-8') as f: + article = f.read() + f.close() + m = re.sub(u'динозавр?', u'кот', article) + m = re.sub(u'Динозавр?', u'Кот', m) + n = re.sub(u'<.*?>', u'', m, flags = re.U) + print(n) + f = open('text.txt', 'w',encoding = 'utf-8') + f.write(n) + f.close +replace() +word = input('Введите слово : ') +for i in range (len(word)): + print(word[-i-1::]) +import os +from os.path import isfile +def search(): + folder = 'C:/Users/Тимур/AppData/Local/Programs/Python/Python35-32' + p = 0 + names = ['test'] + print(os.listdir(folder)) + for files in os.walk(folder): + for f in files: + for i in range(p): + if name[i] != (f.split('.')[0]): + names.append(f.split('.')[0]) + p += 1 + print(p) + for name in names: + print(name) +search() +def name(title): + with open (title+'.txt', 'r', encoding ='utf-8') as f: + text=f.read() + words=text.split(' ') + return words +def ous(): + words = name("title") + p = 0 + k = 0 + for i, word in enumerate (words): + if words[i].count("ous"): + p = p + len(words[i]) + k = k + 1 + print(p / k) +ous() +f = open('C:\\Users\\Тимур\\Desktop\\text.txt', 'r') +k = 0 +l = [line.strip() for line in f] +p = str (l) +x=p.count(" ") +x=x+1 +p=p.split() +for elem in p: + if len(elem) > 10: + k += 1 +percent = k / x * 100 +print(percent) +import random +def noun(): + file = open('noun.txt', 'r', encoding = 'utf8') + f = file.read() + nouns = f.split('\n') + return random.choice(nouns) +def conjunction(): + file = open('conjunction.txt', 'r', encoding = 'utf8') + f = file.read() + conjunctions = f.split('\n') + return ", "+random.choice(conjunctions) +def adjective(): + file = open('adjective.txt', 'r', encoding = 'utf8') + f = file.read() + adjectives = f.split('\n') + return random.choice(adjectives) +def verb(): + file = open('verb.txt', 'r', encoding = 'utf8') + f = file.read() + verbs = f.split('\n') + return random.choice(verbs) +def place(): + file = open('place.txt', 'r', encoding = 'utf8') + f = file.read() + places = f.split('\n') + return random.choice(places) +def part_SS(): + return adjective()+" " +noun()+" " +verb()+" "+place() +def SS(): + return (part_SS()+conjunction()+" "+part_SS()+".").capitalize() +def IfSP(): + return "в то время как " + adjective()+" " + noun() +" "+ verb() +" "+ place() + ", " + noun()+" "+verb() +def TimeSP(): + return "когда " + noun() +" "+ verb() + ", "+ adjective()+" "+ noun()+" "+verb() +def SP(): + ver = random.randint(1,2) + if (ver == 1): + return (IfSP()+".").capitalize() + else: + return (TimeSP()+".").capitalize() +print("УДИВИТЕЛЬНЫЙ ШЕДЕВР НАПИСАННЫЙ МАШИНОЙ") +for i in range(random.randint(5,10)): + sen = random.randint(1,2) + if(sen==1): + print(SS()) + else: + print(SP()) +print("НУ ВОТ И ВСЕ, РЕБЯТА") +import re +def search1(): + with open('plant.html', 'r', encoding = 'utf-8') as f: + found = [] + article = f.read() + f.close() + result2 = re.findall(r'Семейство:\ <\/td>\n

\n(.*))' +link = re.search(reg, content(name)) +link = ((re.search(('title="(.*)"'),link.group())).group()).strip('title="') +print("Отряд", link) +with open (r'C:\Users\Анна\Documents\GitHub\prog\PythonHW11\lingva.html', 'r', encoding = 'utf-8') as f: + content = f.read() +import re +article = re.sub(u'язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?!»():-;])', 'шашлык\\1', content) +article2 = re.sub(u'Язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?»!():-;])', 'Шашлык\\1', article) +with open ('new.txt', 'w', encoding='utf-8') as f: + f.write(article2) +def count_tf(word, text): + return text.count(word) / len(text) +def count_df(word, texts): + n = [1 for text in texts if word in text] + return sum(n) + +def count_idf(word, texts): + n = len(texts) / (1 + count_df(word, texts)) + return n + +from math import log +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + return log(tf, 10) * log(idf, 10) +import re +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + words = text_wo_punct.strip().split() + return words +import os +texts_dic = {} +for root, dirs, files in os.walk('wikipedia'): + for f in files[:50]: + with open(os.path.join(root, f), 'r', encoding='utf-8') as t: + text = preprocessing(t.read()) + texts_dic[f.split('.')[0]] = text +texts = list(texts_dic.values()) +for text in texts_dic: + for word in texts_dic[text]: + scores = {} + scores[word] = count_tf(word, texts_dic[text]) + if scores[word] >= 55: + texts_dic[text].pop(word) +for text in texts_dic: + print("Top words in document {}".format(text)) + scores = {} + for word in texts_dic[text]: + scores[word] = count_tfidf(word, texts_dic[text], texts) + sorted_words = sorted(scores.items(), key=lambda x: x[1]) + for word, score in sorted_words[:5]: + print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) +import os +import re +import codecs +f2 = open('table.csv', 'w', encoding = 'utf-8') +f2.write('Название текста' + ',' + 'Автор' + ',' 'Дата создания текста') +for file in os.listdir('.'): + if file.endswith('xhtml'): + with codecs.open(file, 'r', 'Windows-1251') as f: + a = f.read() + f.close() + r1 = re.search('(.*)', a) + r2 = re.search('', a) + r3 = re.search('', a) + if r1 and r2 and r3: + f2.write(r1.group(1) + ',' + r2.group(1) + ',' + r3.group(1)) +f2.close() + +import os +import re +import codecs +f1 = open('file_words.txt', 'w', encoding = 'utf-8') +for file in os.listdir('.'): + if file.endswith('xhtml'): + f = codecs.open(file, 'r', 'Windows-1251') + a = f.read() + f.close() + r = re.search('(.*)', a) + if r: + f1.write(r.group(1) + '\t' + str(len(re.findall('', a))) + '\n') + print('1') +f1.close() +import os +def func1(): + freqdict = {} + for root, dirs, files in os.walk('.'): + for d in dirs: + if d[0] in freqdict: + freqdict[d[0]] += 1 + else: + freqdict[d[0]] = 1 + return freqdict +def func2(freqdict): + x = 0 + for i in freqdict: + if freqdict[i] > x: + x = freqdict[i] + a = i + print ('название большинства папок начинается на ' + a) + return +func2(func1()) +import random +def length(string): + s1 = '' + i = 0 + for i in range(len(string)): + s1 += '.' + return s1 +def create_arr_and_dic(): + f = open("Слова и подсказки.csv", encoding = "utf-8") + a = f.readlines() + arr = [] + dic = {} + for line in a: + words = line.split(';') + x = words[0].strip('\ufeff') + arr.append(x) + dic[x] = words[1].strip('\n') + return arr, dic +array, dictionary = create_arr_and_dic() +y = random.choice(array) +print('Вот ваша подсказка:', y, length(y)) +z = input('Загаданное слово: ') +if z == dictionary[y]: + print('Правильно.') +else: + print('Увы, нет:(') +import re +def func1(): + d = {} + f = open('тестовый файл.txt', 'r', encoding = 'utf-8') + a = f.readlines() + for line in a: + line = re.sub('(\.\.?\.?|\?|!)(\n)? ?', '.', line) + sentences = line.split('.') + for sentence in sentences: + if len(sentence) >= 1: + d[sentence] = {word.strip(): len(word.strip())\ + for word in sentence.split(' ')} + return d +print(func1()) +word=input('Введите слово ') +anotherword='' +sameword=word +print(word) +for i in range(len(word)-1): + anotherword=word[len(word)-i-1] + for k in range(len(sameword)-1): + anotherword+=sameword[k] + print(anotherword) + sameword=anotherword +f=open('text.txt', encoding="utf-8") +a=f.readlines() +z=0 +x=0 +m=0 + +n=0 +for line in a: + words=line.split() + for z in range(len(words)): + if words[z].endswith('.') or words[z].endswith(','): + x+=1 + m+=x + x=0 + n+=len(words) +print ((n-m)/n*100, '% слов в этом тексте не оканчиваются точкой или запятой') +import random +def actor3(): + slova=[] + f=open('actor3.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + slovo=random.choice(slova) + slovo=slovo.capitalize() + return slovo +def adj2(): + slova=[] + f=open('adj2.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + return random.choice(slova) +def line1(noun, adjective): + return noun + ' ' + adjective +def adverb2(): + slova=[] + f=open('adverb2.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + slovo=random.choice(slova) + slovo=slovo.capitalize() + return slovo +def verb2(): + slova=[] + f=open('verb2.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + return random.choice(slova) +def place2(): + slova=[] + f=open('place2.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + return random.choice(slova) +def line2(adverb, verb, place): + return adverb + ' ' + verb + ' di ' + place + '.' +def actor2(): + slova=[] + f=open('actor2.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + slovo=random.choice(slova) + slovo=slovo.capitalize() + return slovo +def verb3(): + slova=[] + f=open('verb3.txt', encoding="utf-8") + a=f.readlines() + z=0 + for line in a: + words=line.split() + for z in range(len(words)): + slova.append(words[z]) + z=0 + return random.choice(slova) +def line3(noun, verb): + return noun + ' ' + verb + '.' +def randomhaiku(): + haiku = line1(actor3(), adj2()) +\ + '\n' + line2(adverb2(), verb2(), place2()) +\ + '\n' + line3(actor2(), verb3()) + return haiku +print (randomhaiku()) +arr=[] +s=input('Ввведите латинское слово ') +if len(s)!=0: + arr.append(s) +while len(s)!=0: + s=input('Ввведите латинское слово ') + if s.endswith ('re') or s.endswith ('i')or s.endswith ('isse') \ + or s.endswith ('us esse') or s.endswith ('a esse') or s.endswith ('um esse') \ + or s.endswith ('um iri'): + arr.append(s) +for i in range (len(arr)): + print (arr[i]) +N=int(input('Введите число ')) +x=1 +while x!=N: + word=input('Введите слово ') + x+=1 + if word=='программирование': + break +print('Работа программы завершена') +import re +def func1(): + arr = [] + i = 0 + f = open("Текст с глаголом выпить.txt", encoding = "utf-8") + a = f.readlines() + for line in a: + words = line.split() + for i in range(len(words)): + words[i] = words[i].lower() + arr.append(words[i].strip('.,!?/\|()";:')) + f.close() + return arr +arr1 = [] +i = 0 +for i in range(len(func1())): + if re.search('вып((ей(те)?)|(ь(е((шь)|м|те?)|ют?))|(и((л(а|о|и)?)|(т(ь?|(ы(й|ми?|х|е))\ +|(ая?)|(о(е|(го)|й|му?)?)|(ую))))|в(ш((ая)|(ую)|и(й|ми?|х)|е(е|ю|му?)))?))', func1()[i]): + if func1()[i] not in arr1: + arr1.append(func1()[i]) + print(func1()[i]) +a=input('Введите число а') +b=input('Введите число b') +c=input('Введите число c') +a=int(a) +b=int(b) +c=int(c) +if a+b==c: + print ('a и b в сумме дают c') +else: + print ('a и b в сумме НЕ дают c') +if a/b==c: + print ('a разделить на b равно c') +else: + print ('a разделить на b НЕ равно c') +def func1(text_file): + ed = 0 + y = 0 + i = 0 + f = open(text_file, encoding = "utf-8") + a = f.readlines() + for line in a: + words = line.split() + for i in range(len(words)): + if words[i].endswith('ed'): + ed += 1 + if words[i].endswith('ied'): + y += 1 + arr = [] + arr.append(ed) + arr.append(y) + return arr +a = input('Введите название файла, который хотите открыть: ') +print('Количество форм на -ed в тексте: ', func1(a)[0], \ +'\nИз них образованы от глаголов на -y: ', func1(a)[1]) +import re +def func1(): + f = open("Ферма, Пьер — Википедия.html", encoding = "utf-8") + a = f.readlines() + i = 0 + for i in range(len(a)): + r1 = re.search("", a[i]) + r2 = re.search("
.+<\/a><\/td>', article) + print(result2) +search1() +my_file = open('corp.txt', 'r', encoding = 'UTF-8') +text = my_file.read() +arr = text.split('\n') +my_file.close() +new_file = open('new.txt', 'w', encoding = 'UTF-8') +quant = str(len(arr)) +new_file.write(quant) +new_file.close() +import re +my_file = open('corp.txt', encoding = 'UTF-8') +text = my_file.read() +arr = text.split() +my_file.close() +a = [] +b = [] +un = [] +count = 0 +for i in range(len(arr)): + if 'type' in arr[i] and 'lemma' in arr[i + 1]: + item = arr[i] + left_num = item.find('"') + right_num = item.rfind('"') + a.append(item[left_num + 1 : right_num]) +for i in range(len(a)): + for j in range(len(a)): + if a[i] == a[j]: + count += 1 + if a[i] not in un: + un.append(a[i]) + b.append(count) + count = 0 +my_dictionary = dict(zip(un, b)) +print(my_dictionary) +new_file = open(new.txt, 'w', encoding = 'UTF-8') +new_file.write(my_dictionary) +new_file.close() + + + + +def openfile(name): + my_file = open(name, encoding = 'UTF-8') + s = my_file.read() + name_list = s.split() + my_file.close() + return name_list +def un_check(arr = []): + count = 0 + for item in arr: + if item.startswith('un') or item.startswith('Un'): + count += 1 + return count +def length_check(cyph, arr = []): + count = 0 + for item in arr: + if len(item) > cyph: + count += 1 + res = (count / len(arr)) * 100 + return res +def starter(): + file_name = input() + un_number = un_check(openfile(file_name)) + crit = int(input()) + proc = length_check(crit, openfile(file_name)) + print (un_number, proc) +starter() + + + + + + + + + +import re +my_file = open('rask.txt', encoding = 'UTF-8') +text = my_file.read() +my_file.close() +regex = '[!?]' +new_text = re.sub(regex, '.', text) +sent = [item.split() for item in new_text.split('.')] +for words in sent: + unique = set() + for word in words: + word.strip('. , : -') + if words.count(word) > 1 and word not in unique: + unique.add(word) + print('{:^10} {:^10}'.format(word, words.count(word))) + + + + + + +import os +def openfile(file): + with open(file, encoding = 'UTF-8') as my_file: + text = my_file.read() + return set(text.split()) +def check(name): + flag1, flag2 = False, False + for item in name: + if item in openfile('cyr.txt'): + flag1 = True + elif item in openfile('lat.txt'): + flag2 = True + if flag1 and flag2: + return True + else: + return False +names = set() +count = 0 +for f in os.listdir(): + if check(f): + names.add(f) + if os.path.isdir(f): + count += 1 +print('Найдено таких папок: ', count, '\n' + 'Файлы и папки такого рода: ', *names) +from __future__ import print_function +import io +word = ' союз ' +with io.open('freq.txt', encoding='utf-8') as file: + for line in file: + if word in line1: + print(line, end='') +from __future__ import print_function +import io +word = ' жен ' +with io.open('freq.txt', encoding='utf-8') as file: + for line in file: + if word in line: + stop = ' ' + i = 0 + tail = len(line) + ipm = 0 + while line[i] != stop: + print(line[i], end = ' ') + i += 1 + +given_word = input('Введите, пожалуйста, любое слово: ') +while len(given_word) > 0: + print(given_word) + given_word = given_word[1:len(given_word) - 1] + + + + + +import random +def subj(): + subj_file = open('nouns.txt', encoding = 'UTF-8') + s = subj_file.read() + subj_list = s.split() + return random.choice(subj_list) + subj_file.close +def act(): + act_file = open('activities.txt', encoding = 'UTF-8') + s = act_file.read() + act_list = s.split() + return random.choice(act_list) + act_file.close() +def sylls_counter(word): + vowels = open('vowels.txt', encoding = 'UTF-8') + s = vowels.read() + mark = s.split() + vowels.close() + count = 0 + for item in word: + if item in mark: + count += 1 + return count +def obj(): + obj_file = open('objects.txt', encoding = 'UTF-8') + s = obj_file.read() + obj_list = s.split() + return random.choice(obj_list) + obj_file.close() +def puncmark(): + punc_file = open('punctuation.txt', encoding = 'UTF-8') + s = punc_file.read() + punc_list = s.split() + return random.choice(punc_list) + punc_file.close() +def composer(): + line = (subj().capitalize() + ' ' + act() + ' ' + obj()) + puncmark() + return line +def check7(w7): + if sylls_counter(w7) == 7: + return 1 + else: + return 0 +def check5(w5): + if sylls_counter(w5) == 5: + return 1 + else: + return 0 +def final_tanka(): + for i in range(5): + a = composer() + if i in [0, 2]: + c = 0 + while c != 1: + a = composer() + c = check5(a) + print(a) + else: + d = 0 + while d != 1: + a = composer() + d = check7(a) + print(a) +final_tanka() +print("Введите, пожалуйста,три любых числа") +a = int(input()) +b = int(input()) +c = int(input()) +if a/b == c: + print("Частное от деления числа a на число b равно числу c") +else: + print("Частное от деления числа a на число b не равно числу c") +if a**b == c: + print("Число a в степени b равно числу с") +else: + print("Число a в степени b не равно числу c") +print("Введите, пожалуйста,три любых числа") +a = int(input()) +b = int(input()) +c = int(input()) + + + + + + + +if a+b == c: + print("Сумма чисел a и b равна числу c") +else: + print("Сумма чисел a и b не равна числу c") + +if a*b == c: + print("Произведение чисел a и b равно числу c") +else: + print("Произведение чисел a и b не равно числу c") + +if a%b == c: + print ("Число a даёт остаток, равный числу c при делении на число b") +else: + print ("Число a не даёт остаток, равный числу c при делении на число b") + +if -1*b/a == c: + print("Число c является решением линейного уравнения ax+b") +else: + print("Число c не является решением линейного уравнения ax+b") + +if a/b == c: + print("Частное от деления числа a на число b равно числу c") +else: + print("Частное от деления числа a на число b не равно числу c") + +if a**b == c: + print("Число a в степени b равно числу с") +else: + print("Число a в степени b не равно числу c") +import re +t_file = open('Finnish.html', encoding = 'UTF-8') +lines = t_file.read() +t_file.close() +clearing = re.compile('<[/]?[a-z]*>') +lines = re.sub(u'<.*?>', u'', lines, flags = re.U) +p1 = re.compile('ISO 639-3\D', re.IGNORECASE) +p2 = re.compile('\n[a-z]{3}', re.IGNORECASE) +m1 = p1.search(lines) +m2 = p2.search(lines[m1.end():len(lines)]) +print(m1.group(), m2.group()) + + + + +import os +def file_counter(): + Max = 0 + count = 0 + for root, dirs, files in os.walk('.'): + for item in root: + if not item.startswith('.'): + count = len(files) + if count > Max: + name = os.path.join(root) + Max = count + count = 0 + print('Максимальное количество файлов -', str(Max) + ',', 'содержится в папке ', name) +file_counter() +import re +my_file = open('Birds.html', 'r', encoding = 'UTF-8') +text = my_file.read() +my_file.close() +text = re.sub(u'<.*?>&a-zA-Z', u'', text, flags = re.U) +arr = text.split() +var1 = re.compile('Птиц(а|ы|у|е|ам|ах|ами)') +var2 = re.compile('[^\w]птиц(а|ы|у|е|ам|ах|ами)') +var3 = re.compile('Птицей') +var4 = re.compile('[^\w]птицей') +s = '' +for item in arr: + if re.search(var1, item): + item = re.sub(u'Птиц', u'Рыб', item, flags = re.U) + elif re.search(var2, item): + item = re.sub(u'птиц', u'рыб', item, flags = re.U) + elif re.search(var3, item): + item = re.sub(u'Птицей', u'Рыбой', item, flags = re.U) + elif re.search(var4, item): + item = re.sub(u'птицей', u'рыбой', item, flags = re.U) +for item in arr: + s += ' ' + item + ' ' +new_file = open('Forms.txt', 'w', encoding = 'UTF-8') +new_file.write(s) + +import random + +def gettingfile(): + t=[] + with open ('словник.txt','r',encoding='utf-8') as f: + t=f.readlines() + return t + + +def getcomponent(tag): + a=[] + for i in gettingfile(): + if tag in i: + a.append(i[:i.find(tag)]) + return random.choice(a) + +def getline(tags): + s=[] + for tag in tags: + s.append(getcomponent(tag)) + capital=s[0][0] + if not capital.isupper(): + capital=capital.upper() + s[0]=capital+s[0][1:] + line=''.join(s) + return line[:len(line)-1] + +def getprep(line,iter): + if 'Зачем' in line: + return '?' + elif 'Как так' in line: + return '?!' + elif iter==3: + return '.' + else: + return ',' + + + +def main(): + wordtags=[['N.M.anim','V.TR-M','N.-ACC','N-INSTR'],['CLIT1','N-GEN','ADV','N1','V.INTR'],\ + ['ADRB1','N.M.anim','V.INTR','ADJ-M'],['CLIT2','ADV','V-INF','ADRB2','N.-ACC']] + for c in range(4): + l=getline(random.choice(wordtags)) + print(l+getprep(l,c)) + input() + return +if __name__=='__main__': + main() +import re +import os +import shutil +def printfilenames(path): + + printed = [] + counter = 0 + for i in os.listdir(path): + if os.path.isfile(i): + a = i[:i.rfind('.')] + if a not in printed: + print(i) + printed.append(a) + if not re.search('[0-9]',a): + counter += 1 + else: + if i not in printed: + print(i) + printed.append(i) + print('Количество файлов, не содержащих цифры в названии = ', counter) + return +def main(): + printfilenames('.') +if __name__ == '__main__': + main() +import random +def getdictionary(a): + with open(a,'r',encoding='utf-8') as f: + x=f.readlines() + d=dict() + for line in x: + line=[i for i in line.split(',')] + d[line[0]]=line[1:] + return d +def game(wordlist): + keys=[i for i in wordlist.keys()] + word=random.choice(keys) + t=True + + points=''.join(['.']*len(word)) + while t: + print(random.choice(wordlist[word])+' '+points+' ?') + ans=input().lower() + if ans==word: + print('Правильно !') + t=False + else: + print('Неправильно. Ещё одну попытку ?') + r=input().lower() + while r!='да': + if r=='нет': + return + else: + print('Неверный ввод, введите "да" или "нет" ') + r=input().lower() + return +def main(): + print('Сыграем в игру ?') + a=input().lower() + while a!='нет': + if a.lower()=='да': + + game(getdictionary('Слова и подсказки.csv')) + print('Cыграем ещё раз ?') + a=input().lower() + continue + elif a!='нет': + print('Неверный ввод, введите "да" или "нет" ') + a=input().lower() + print('До свидания !') +if __name__=='__main__': + main() +s=input() +for i in range(1,len(s)+1): + print(s[:i]) +import re + +def getsentences (filename): + try: + with open (filename, 'r', encoding = 'utf-8') as f: + text = f.read() + if text: + + smark = re.compile('\w.*?[.…!?\n]') + sentences = re.findall(smark, text) + if sentences: + sentences = [punctcut(i) for i in sentences] + sentences = [i for i in sentences if i] + else: + print('Предложений не найдено') + return sentences + else: + print ('Выбран пустой текстовый файл') + return False + except UnicodeDecodeError: + print('Неверная кодировка! Нужен файл в utf-8') + +def punctcut(s): + words = re.findall('\w*-?\w*',s) + if words: + words = [i for i in words if i] + return ' '.join(words) + else: + return '' +def main(): + upwordscheck = False + sentences = getsentences(input('Введите имя файла: ')) + if sentences: + for sentence in sentences: + if sentence.count(' ')>9: + upwords=[i for i in sentence.split(' ') if i.istitle()] + if upwords: + + upwordscheck = True + print('{:^20}'.format(' '.join(upwords))) + if not upwordscheck: + print('Слов с большой буквы в предложениях длиннее 10 слов не найдено') +if __name__ == '__main__': + main() +with open(input(),'r+', encoding='utf-8') as f: + + + allines=f.readlines() + minlen=len(allines[0]) + maxlen=0 + for i in allines: + if i and i!='\n': + if len(i)>maxlen: + maxlen=len(i) + if len(i)5: + print(i) +import re +def getwords(filename): + with open (filename,'r',encoding='utf-8') as t: + text=t.readlines() + words=set() + for line in text: + if line and line!='\n': + lw=[i.strip(',.!()[]{};:""?<>-\n') for i in line.split(' ')] + for w in lw: + if w: + words.add(w.lower()) + return words +def main(): + f=re.compile('на(ш(л[аои]|е(л|дший?)|ёл)|й(ти|д(и(те)?|ут?|[её](нн(ый|ое|ая)|шь|те?|м)|я)))') + words=getwords(input('Введите имя файла: ')) + forms=set() + for word in words: + if f.match(word)!=None: + forms.add(word) + for i in forms: + print(i) +if __name__=='__main__': + main() +import re +def getxt(): + with open(input('Введите имя входного файла: '),'r',encoding='utf-8') as f: + text=f.read() + return text +def printinf(s): + with open(input('Введите имя выходного файла: '),'w',encoding='utf-8') as f: + f.write(s) + return +def main(): + a=r'[\s\S]*?<.*?>Часовой пояс([\s\S]*?)' + b=r'(<.*?>)([^<>]*)' + timezone=re.search(a,getxt()).group(1) + onlywords=re.findall(b,timezone) + timezone='' + for i in onlywords: + timezone+=i[1] + printinf(timezone) +if __name__=='__main__': + main() +def getwords(filename): + with open (filename,'r',encoding='utf-8') as f: + text=f.read() + words=[i.strip(',!.";:()-«»') for i in text.split(' ')] + return words +def wordswithending(words,a): + m=[] + c=0 + for word in words: + if len(word)>len(a): + if word[len(word)-len(a):]==a: + if not word in m: + c+=1 + m.append(word) + print ('Количество разных слов с окончанием -'+a+' в тексте: ',c) + return m +def maxwordfrequency(m): + freqs=[] + for i in range(len(m)): + freqs.append(0) + for word in m: + if m[i]==word: + freqs[i]+=1 + maxind=0 + for i in range(len(freqs)): + if freqs[i]>maxind: + maxind=i + return m[maxind] +def main(): + x=maxwordfrequency(wordswithending(getwords(input('Введите имя файла: ')),'ness')) + print('Слово с максимальной частотой: ',x) +if __name__=='__main__': + main() +import os +import re +def savedict(d, filename): + with open(filename,'w', encoding='utf-8') as f: + for key in d: + line = key + '\t' + str(d[key])+'\n' + f.write(line) + return +def savearray(ar, filename): + with open(filename,'w', encoding='utf-8') as f: + for el in ar: + line = '\t'.join(el)+'\n' + f.write(line) + return +def findbigrams(t): + bigrams = [] + sentences = re.findall('[\w\W]+?', t) + for sentence in sentences: + bigram = '' + raw_sentence = re.sub('<.*?>','',sentence) + raw_sentence = re.sub('\n','',raw_sentence) + words = re.findall('[\w\W]+?', sentence) + for i in range(len(words)): + if ('gr="PR' in words[i]) and (i','',words[i])+' '+re.sub('<.*?>','',words[i+1]) + if bigram: + bigrams.append([bigram,raw_sentence]) + return bigrams +def task1(): + d = dict() + path = '.\\news' + for root, dirs, files in os.walk(path): + for file in files: + + with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: + txt = f.read() + d[file] = txt.count('') + savedict(d, 'sentencesnumber.csv') +def task2(): + lines = [] + lines.append(["Название файла", "Автор", "Тематика текста"]) + for root, dirs, files in os.walk(path): + for file in files: + nm = file + with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: + txt = f.read() + author = re.search(r'',txt).group(1) + topic = re.search(r'',txt).group(1) + lines.append([nm, author, topic]) + savearray(lines, 'authors_and_topics.csv') +def task3(): + corpus = '' + for root, dirs, files in os.walk(path): + for file in files: + + with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: + txt = f.read() + corpus += txt + bigrams = findbigrams(corpus) + savearray(bigrams, 'bigrams.csv') +def main(): + task1() + task2() + task3() +if __name__ == '__main__': + path = '.\\news' + main() +word='' + + +while word=='': + print('Введите 1 русское слово(без пробелов и знаков препинания):') + word=input() + if word: + for i in word: + if ord(i)<128 or 175241: + print('Слово может содержать только кириллические буквы без пробелов и др. знаков') + word='' + break + if word=='': + continue + for index,elem in enumerate(word): + if index%2==0 and (elem=='о' or elem=='п' or elem=='е'): + print(elem,end=' ') + else: + print('Нужно ввести слово') +print('Введите число a:') +a=int(input()) +print('Введите число b:') +b=int(input()) +print('Введите число c:') +c=int(input()) +d='не' +e='не' +if a*b==c: + d='' +if a*c+b==0: + e='' +print('произведение ',a,'и ',b,d,'равно ',c) +print(c,e,'является корнем уравнения ',a,'x+',b,'=0') +import re + +def task1(filename): + with open (filename, 'r', encoding='utf-8') as f: + l=len(f.readlines()) + with open ('Stringnumber.txt', 'w', encoding='utf-8') as t: + t.write('The number of strings is '+str(l)) + return + +def task2(filename): + d=dict() + with open(filename,'r',encoding='utf-8') as f: + text=f.read() + t=re.compile(r'(.*?)') + words=re.findall(w,text) + with open ('f.csv','w',encoding='utf-8') as f: + for i in words: + f.write(','.join(i)+'\n') + return + +def task3(filename): + d=dict() + with open(filename,'r',encoding='utf-8') as f: + text=f.read() + t=re.compile(r' mco: + mco = c + if not c == 0: + dct[root.split('\\')[len(root.split('\\'))-1]] = c + return dct, mco +def find_right_one(dct,mco): + ar = [] + for key in dct: + if dct[key] == mco: + ar.append(key) + return ar +def main(): + ar = find_right_one(find_folder()[0],find_folder()[1]) + print("Папка(-и), где больше всего файлов: " + " | ".join(ar)) + +if __name__ == "__main__": + main() +def getwords(): + f = open("hemingway.txt",'r') + ar = f.read().split() + f.close() + for i,word in enumerate(ar): + ar[i] = word.strip("!?.”,:;’\'\"-—“").lower() + return(ar) +def un(ar): + arr = [] + for word in ar: + if word.startswith('un'): + arr.append(word) + return(arr) +def verify(ar,inp): + amount = 0 + for word in ar: + if len(word)>inp: + amount += 1 + return str(amount/len(ar)*100)+"%"+" of words are longer than " + str(inp) + " letters." +def main(): + while True: + inp = int(input("Enter a number. Enter 0 to close the program. ")) + if inp > 0: + print(verify(un(getwords()),inp)) + else: + print("Bye!") + break +if __name__ == "__main__": + main() +import random + + +def open_file(): + f = open("d.csv","r") + ar = [] + for line in f.readlines(): + ar.append(line.split()) + f.close() + return ar +def create_d(ar): + d = {} + for line in ar: + if line[0] == "inv": + d[line[2]] = line[1] + "*" + elif line[0] == "n": + d[line[1]] = line[2] + return d +def verify(word): + inp = input("Угадайте слово: ") + if inp == word: + print(congr(True)) + else: + print(congr(False)) +def congr(sw): + pos = ["Вы угадали!","Ура, Вы угадали!","Угадали...","Наконец-то! Вы угадали!","Неужели вы -- угадали?.."] + neg = ["Не угадали, попробуйте еще.","Неправильно.","Увы -- неправильно...","Вовсе нет, пробуйте еще."] + if sw: + return random.choice(pos) + else: + return random.choice(neg) +def guess(d): + key = random.choice(list(d.keys())) + ar = [key,d[key]] + if ar[1].endswith("*"): + print(ar[1].strip("*") + "...") + verify(ar[0]) + else: + print("..." + ar[1]) + verify(ar[0]) +def main(): + while True: + guess(create_d(open_file())) + if input("Хотите продолжить? Сделайте пустой ввод, если хотите. Если нет -- введите что-нибудь: ") != "": + print("До свидания!") + break +if __name__ == "__main__": + main() + +import re +def getar(): + f = open("download-excel.xml","r",encoding = "utf8") + ar = f.read() + f.close() + return ar +def count_lines(ar): + f = open("out1.txt","w",encoding="utf8") + f.write(str(len(ar.split("\n")))) + f.close() +def dct_morph(ar): + arr = re.findall("(?:.*?)",ar) + dct = {} + for key in arr: + if key not in dct: + dct[key] = 1 + else: + dct[key] += 1 + return dct +def dct_morph_out(dct): + f = open("out2.txt","w",encoding="utf8") + st = "" + for key in dct: + st = st + str(key) + "\n" + f.write(st) + f.close() +def find_adj(text): + arr = re.findall("(?:.*?)",text) + dct = {} + for key in arr: + if key not in dct: + dct[key] = 1 + else: + dct[key] += 1 + return dct +def find_adj_out(dct): + f = open("out3.txt","w",encoding="utf8") + st = "" + for key in dct: + st = st + str(key) + " " + str(dct[key]) + "\n" + f.write(st) + f.close() +def look_better(text): + arr = re.findall("(.*?)",text) + st = "" + for el in arr: + st = st + el[0] + "," + el[1] + "," + el[2] + "\n" + text1 = re.sub("(.|\n)*","\n"+st+"\n",text) + f = open("out4.csv","w",encoding="utf8") + f.write(text1) + f.close() +def main(): + while True: + st = input('Введите 1 для 1 п., 2 для 2 п., 3 для 3 п. задания, любой другой символ для выхода:') + if st == "1": + count_lines(getar()) + elif st == "2": + dct_morph_out(dct_morph(getar())) + elif st == "3": + find_adj_out(find_adj(getar())) + look_better(getar()) + + else: + print("До свидания.") + break +if __name__ == "__main__": + main() + + + + + + +import re +def gettext(): + f = open("1.html",'r',encoding="utf8") + ar = f.read() + f.close() + return ar +def findcode(st): + reg = "(http://www-01\.sil\.org/iso639-3/documentation\.asp\?id=)([a-z][a-z][a-z])" + matches = re.findall(reg,st) + return matches[0][1] +def main(): + print("Код этого языка: "+findcode(gettext())) +if __name__ == "__main__": + main() +import re +def getar(): + f = open("1.txt",'r',encoding="utf8") + ar = f.read().split() + f.close() + return ar +def normalize(ar): + punct = "!?.,:;\'\"-—" + arr = [] + for word in ar: + word = word.strip("!?.,:;\«»'\"…-—()][*1234567890").lower() + if word != "": + arr.append(word) + return arr +def findverb(ar): + arr = [] + for word in ar: + if re.match("(сиде?(л|ть|в|я))|(сиж(у|ива))|(сиди(те)?)",word): + arr.append(word) + return arr +def main(): + print(" ".join(findverb(normalize(getar())))) +if __name__ == "__main__": + main() + + + + + +def getar(inp): + f = open(inp + ".csv",'r',encoding="utf-8") + a = f.read().split("\n") + b = [] + for el in a: + b.append(el.split(";")) + return b +def add_feature(arX,arY): + ar1 = arX + ar2 = arY + ar = [] + arr = [] + + for line2 in ar2: + for line1 in ar1: + if line2[0] == line1[1]: + ar.append("*".join(line1)+"*"+line2[1]) + for line in ar: + arr.append(line.split("*")) + for i in range(0,100): + print(arr[i]) + return arr +def getlines(ar): + lines = [] + lines1 = [] + for i in range(0,len(ar[1])): + lines.append("") + for n in range(0,len(ar[1])): + for line in ar: + if not n in [4,5]: + lines[n] += "\"{}\", ".format(line[n]) + elif n in [4,5]: + lines[n] += "{}, ".format(line[n]) + for line in lines: + lines1.append(line[:-2]) + return lines1 +def outp(ar): + + + + f = open("output.txt",'w+',encoding="utf-8") + st = "locat <- data.frame(language = c({}), dialect = c({}), latitude = c({}), longitude = c({}), feature = c({}))".format(ar[1],ar[2],ar[3],ar[4],ar[5]) + f.write(st) +outp(getlines(add_feature(getar("csv-template"),getar("features")))) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def getar(inp): + f = open(inp + ".csv",'r',encoding="utf-8") + a = f.read().split("\n") + b = [] + for el in a: + if not el == "": + b.append(el.split(";")) + for el in b: + for ell in el: + if "\ufeff" in ell: + print(ell) + ell = ell.replace("\ufeff","") + return b +def add_feature(arX,arY): + ar1 = arX + ar2 = arY + ar = [] + arr = [] + + + + + + + for l1 in ar1: + for l2 in ar2: + if l1[3] == l2[0]: + ar.append("*".join(l1)+"*"+l2[1]) + for line in ar: + arr.append(line.split("*")) + + + return arr +def out2(ar): + arr = [] + for el in ar: + arr.append(";".join(el)) + st = "\n".join(arr) + f = open("output2.csv","w+",encoding="utf-8") + f.write(st) + f.close() +def getlines(ar): + lines = [] + lines1 = [] + for i in range(0,len(ar[1])): + lines.append("") + for n in range(0,len(ar[1])): + for line in ar: + if not n in [4,5]: + lines[n] += "\"{}\", ".format(line[n]) + elif n in [4,5]: + lines[n] += "{}, ".format(line[n]) + for line in lines: + lines1.append(line[:-2]) + return lines1 +def outp(ar): + + + + f = open("output.txt",'w+',encoding="utf-8") + st = "locat <- data.frame(language = c({}), dialect = c({}), latitude = c({}), longitude = c({}), feature = c({}))".format(ar[1],ar[2],ar[3],ar[4],ar[5]) + f.write(st) +def main(): + print("fawfwa") + out2(add_feature(getar("allvillages"),getar("features"))) +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + +import os +import re + +def open_file(name): + f = open(name,"r") + txt = f.read() + f.close() + return re.findall(".*[.,;:?!]*", txt) +def write_to_file(st,filename): + f = open(filename,"w+",encoding = "utf-8") + f.write(st) + f.close() +def count_words_infile(): + st = "" + for roots,dirs,files in os.walk("."): + for file in files: + if file.endswith(".xhtml"): + st = st + file + "\t"+ str(len(open_file(os.path.join(roots,file)))) + "\n" + write_to_file(st,"output_1.txt") +def find_data(): + fd = [] + for roots,dirs,files in os.walk("."): + for file in files: + if file.endswith(".xhtml"): + with open(os.path.join(roots,file)) as text: + if file.endswith(".xhtml"): + t = text.read() + fd.append([file,"".join(re.findall("",t)),\ + "".join(re.findall("",t))]) + st = "Название файла;Автор;Год создания\n" + for line in fd: + st = st + ";".join(line) + "\n" + write_to_file(st,"output_2.csv") +def find_bigrams(): + bigrams = [] + for roots,dirs,files in os.walk("."): + for file in files: + if file.endswith(".xhtml"): + ar = open_file(os.path.join(roots,file)) + for i,word in enumerate(ar): + if re.search("(.+)<",word).group(1)+" " +re.search("ana>(.+)<",ar[i+1]).group(1) + bigrams.append(bigram) +def cleanse(): + text = "" + for roots,dirs,files in os.walk("."): + for file in files: + if file.endswith(".xhtml"): + ar = open_file(os.path.join(roots,file)) + for word in ar: + text = text + " " + "".join(list(re.findall("ana>(.+)<.*([.,;:?!])?",word))) + write_to_file(text,"test_cleanse.txt") +def main(): + find_bigrams() + find_data() + count_words_infile() +main() + + + + + +def main(): + num = int(input("Enter a number ")) + nump = 2 + while num >= nump: + print(nump) + nump *= 2 +main() +def main(): + st = input("Enter a string: ") + print(st) + while len(st) > 1: + st = st[1:len(st)-1] + print(st) +main() + + +import re +def gettext(): + f = open("1.txt",'r',encoding="utf8") + ar = f.read() + f.close() + return ar +def repl(text): + s1 = re.sub("Птиц(|а|ы|у|е|ей|ам|ами|ах)","Рыб\\1",(re.sub("птиц(|а|ы|у|е|ей|ам|ами|ах)","рыб\\1",text))) + return(s1) +def savetext(text): + f = open("2.txt","w",encoding="utf8") + f.write(text) + f.close() +def main(): + savetext(repl(gettext())) +if __name__ == "__main__": + main() + +def intver(): + a = input("Enter the number:") + while not a.isdigit(): + a = input("You shouldn't enter any letters. Enter again:") + a = int(a) + return a +a = intver() +b = intver() +c = intver() +if a/b == c: + print("a/b=c") +else: + print("a/b doesn't equal c") +if a**b == c: + print("a^b=c") +else: + print("a^b doesn't equal c") +import re +def gettext(): + f = open("1.txt",'r',encoding="utf8") + ar = f.read() + f.close() + return ar +def splitting(s): + s1 = re.sub("([А-Я]|род)\.","\\1&&&",s) + s1 = re.sub("(\.|!|\?)","\\1 + ar = s1.split(" + for i in range(0,len(ar)): + ar[i] = ar[i].replace("&&&",".") + return ar +print("\n".join(splitting(gettext()))) + + + + + + + + + + + +import random +def file_op(name): + f = open(name,'r') + ar = f.read().split(' ') + f.close() + return(ar) +def adverb(): + ar = file_op("adverbs.txt") + return random.choice(ar) + ' ' +def int_clit(): + + ar = file_op("int_clitics.txt") + return random.choice(ar) + ' ' +def verb(): + + ar = file_op("verbs.txt") + return random.choice(ar) + ' ' +def name_fem(): + + ar = file_op("name_fem.txt") + return random.choice(ar) + ' ' +def adj_fem1(): + + ar = file_op("adj_fem1.txt") + return random.choice(ar) + ' ' +def adj_fem2(): + + ar = file_op("adj_fem2.txt") + return random.choice(ar) + ' ' +def prepositions1(): + + ar = file_op("prepositions1.txt") + return random.choice(ar) + ' ' + + + +def names_prcaus(q): + + if q == "sg": + ar = file_op("names_prcaus_sg.txt") + return random.choice(ar) + ' ' + elif q == "pl": + ar = file_op("names_prcaus_pl.txt") + return random.choice(ar) + ' ' +def adj_prcaus(q): + if q == "sg": + ar = file_op("adj_prcaus_sg.txt") + return random.choice(ar) + ' ' + elif q == "pl": + ar = file_op("adj_prcaus_pl.txt") + return random.choice(ar) + ' ' +def name_gen(): + + ar = file_op("name_gen.txt") + return random.choice(ar) + ' ' +def adj_gen(): + + ar = file_op("adj_gen.txt") + return random.choice(ar) + ' ' +def name_nom(): + ar = file_op("name_nom.txt") + return random.choice(ar) + ' ' +def adj_nom(): + ar = file_op("adj_nom.txt") + return random.choice(ar) + ' ' +def name_gen_sg(): + ar = file_op("name_gen_sg.txt") + return random.choice(ar) +def punct(): + ar = file_op("punct.txt") + return random.choice(ar) +def verse1(): + return int_clit() + adverb() + verb() +def verse2(): + if random.randint(1,2) == 1: + return(name_fem() + adj_fem2()) + return(adj_fem1() + name_fem()) +def verse3(): + if random.randint(1,2) == 1: + return "в " + names_prcaus("sg") + adj_prcaus("sg") + return "в " + names_prcaus("pl") + adj_prcaus("pl") +def verse4(): + return prepositions1() + name_gen() + adj_gen() +def verse5(): + return adj_nom() + name_nom() + name_gen_sg() + punct() +def main(): + print("Ныне забытый поэт-символист В.К. в начале 20 в. написал цикл стихотворений танка; цикл посвящен В.Я. Брюсову. \nВ.К. \nПять танок.") + for i in range(1,6): + print(i) + print(verse1().capitalize() + "\n" + verse2().capitalize() + "\n" + verse3().capitalize() + "\n" + verse4().capitalize() + "\n" + verse5().capitalize()) + print("1908 г.") +main() + + + + + +def task1(): + f = open('freq.txt','r',encoding = "UTF-8") + line = [] + for i in f: + line = i.split(' | ') + if line[1] == 'союз': + print(' | '.join(line)) + f.close() +def task2(): + f = open('freq.txt','r',encoding = "UTF-8") + line = [] + nom = [] + st = "" + summ = 0 + for i in f: + line = i.split(' | ') + nom = line[1].split() + if nom[0] == "сущ" and len(nom)==5: + if nom[2] == "ед" and nom[3] == "жен": + st += line[0] + ", " + summ += float(line[2]) + f.close() + print(st) + print("Суммарный ipm: ", summ) +def npt(): + + ary = [] + while True: + np = input("Enter a word").lower() + if np == "": + print("Конец ввода") + break + ary.append(np) + return ary +def task3(): + ary = npt() + f = open('freq.txt','r',encoding = "UTF-8") + line = [] + dictio = [] + ver = False + for i in f: + dictio.append(i.split(" | ")) + for word in ary: + for i in range(len(dictio)): + ver = False + if word == dictio[i][0]: + print("Для слова " + "\"" + word + "\"" + ": "+ dictio[i][1]+ " | "+ dictio[i][2]) + ver = True + break + if not ver: + print("Слова " + "\""+ word +"\" " + "в словаре нет.") + f.close() + +def main(): + while True: + a = int(input("Введите номер задания: ")) + if a == 1: + task1() + elif a==2: + task2() + elif a==3: + task3() + else: + break +if __name__ == '__main__': + main() +def main(): + f = open('1.txt','r') + summ = 0 + per = 0 + ary = [] + for line in f: + summ += 1 + ary = line.split() + i = -1 + for word in ary: + i += 1 + if ary[i] in "-?!,.:;()\"'": + ary.pop(i) + if len(ary) > 5: + per += 1 + print(str(per/summ*100)+"%") + f.close() + +if __name__ == '__main__': + main() +def npt(): + ar = [] + for i in range(8): + ar.append(input("Enter a string: ")) + return ar +def main(): + ar = npt() + for i in range(0,7,2): + print(ar[i]+ar[i+1]) +main() +import os +def cyr_lat(cyr,lat,st): + iscyr = False + islat = False + for letter in st: + if not iscyr and letter in cyr: + iscyr = True + elif not islat and letter in lat: + islat = True + if iscyr and islat: + return True +def find_folders(): + ar = os.listdir('.') + c = 0 + for sth in ar: + if os.path.isdir(sth) and cyr_lat('абвгдеёжзийклмнопрстуфхцчшщъыьэюя','abcdefghijklmnopqrstuvwxyz',sth): + c += 1 + return c +def nrep_list(): + ar = os.listdir('.') + dct = {} + for el in ar: + if os.path.isdir(el): + dct[el]=1 + else: + for i in range(0,len(el)): + if el[len(el)-1-i] == ".": + dct[el[:len(el)-1-i]]=1 + break + print('\n'.join([key for key in dct])) +def main(): + print("{} пап(ка|ки|ок) с кириллическими и латинскими символами. \nНеповторяющиеся имена файлов/папок:".format(find_folders())) + nrep_list() +if __name__ == '__main__': + main() +import re +def getar_sentences(): + f = open("solj.txt",'r',encoding="utf-8") + st = f.read() + ar = [re.sub("\n"," ",re.sub("[,;\"}:-]",'',sentence).lower()) for sentence in re.split('[!?.]', st) if sentence != ''] + f.close() + return ar +def freq(word,ar): + count = 0 + for w in ar: + if w == word: + count += 1 + return count +def count_rep(ar): + arr = [sentar.split() for sentar in ar] + for sentence in arr: + d = {word:freq(word,sentence) for word in sentence} + for key in d: + if d[key] > 1: + print('{}{:^20}'.format(key,d[key])) + for sentence in arr: + for word in sentence: + if not word in d: + d[word] = 1 + else: + d[word] += 1 +def main(): + count_rep(getar_sentences()) +if __name__ == "__main__": + count_rep(getar_sentences()) +main() +import random +def words(file): + f = open(file, "r", encoding = "utf(8)") + f1 = f.read() + arr = f1.split() + f.close() + return arr +def noun(number): + singular = words("sing_nouns.txt") + plural = words("pl_nouns.txt") + if number == 's': + return random.choice(singular) + return random.choice(plural) +def noun2(): + nouns = words("nouns2.txt") + return random.choice(nouns) +def punctuation(): + marks = [".", "?", "!", "..."] + return random.choice(marks) +def verb(syllables): + verbs2 = words("verbs2.txt") + verbs3 = words("verbs3.txt") + if syllables == 2: + return random.choice(verbs2) + return random.choice(verbs3) +def noun_phrase(): + clitics = words("clitics.txt") + clitic = random.choice(clitics) + noun1 = noun('s') + return clitic + ' ' + noun1 +def verse1(): + return noun('s') + ' ' + verb(3) + punctuation() +def verse2(): + return noun('s') + ' ' + verb(2) + ' ' + noun('pl') + punctuation() +def verse3(): + return verb(3) + ' ' + noun('s') + punctuation() +def verse4(): + return noun_phrase() + ' ' + verb(2) + ' ' + noun2() + punctuation() +def make_verse(syllables): + if syllables == 5: + verse = random.choice([1,3]) + if verse == 1: + return verse1() + else: + return verse3() + else: + verse = random.choice([2,4]) + if verse == 2: + return verse2() + else: + return verse4() +print(make_verse(5)) +print(make_verse(7)) +print(make_verse(5)) +print(make_verse(7)) +print(make_verse(7)) +import os +def num_files(path): + c = 0 + for f in os.listdir(path): + if os.path.isfile(os.path.join(path, f)): + c += 1 + return c +def most_files(path): + n = num_files(path) + name = path + for root, dirs, files in os.walk(path): + for d in dirs: + new = os.path.join(root, d) + c = num_files(new) + if c > n: + n = c + name = d + return name +print(most_files('.')) +f = open("isl.xml", "r", encoding = "utf(8)") +d = {} +for line in f: + if "" not in line: + count +=1 + line = f.readline() +f.close() +f1 = open("res.txt", "w", encoding = "utf(8)") +f1.write(str(count)) +f1.close() +import re +f = open("isl.xml", "r", encoding = "utf(8)") +f1 = open("res2.txt", "w", encoding = "utf(8)") +for line in f: + m = re.search(r'type="f.h\w*">', line) + if m != None: + s = line[m.end():] + n = re.search(r'\w*', s) + if n != None: + f1.write(n.group() + ', ') +f1.close() +f.close() +f = open("isl.xml", "r", encoding = "utf(8)") +f1 = f.read() +a = re.search("", f1) +b = re.search(r"\s*", f1) +f2 = f1[a.end()+1:b.start()] +dtags = re.sub(r'<.*?>', '', f2, flags = re.DOTALL) +f.close() + +import random +d = {} +file = open("words.csv", "r") +for line in file: + line = line.strip('\n') + arr = line.split(';') + d[arr[0]] = arr[1] +file.close() +a = [] +for key in d: + a.append(key) +word = random.choice(a) +print("Подсказка:", word, "...") +noun = input() +if noun == d[word]: + win = ["ура!", "вы отгадали", "победа"] + print(random.choice(win)) +else: + lose = ["вы не отгадали", "вы проиграли", "попробуйте еще раз"] + print(random.choice(lose)) +import re +import os +def sentences(text): + text1 = re.split('[.?!]', text) + return text1 +def lengths(folder): + d = {} + for f in os.listdir(folder): + text = open(os.path.join(folder, f), 'r') + s = text.read() + text.close() + m = re.sub(u'<.*?>', u'', s, flags = re.DOTALL) + s1 = sentences(m) + d[f] = len(s1) + return d +def write_lengths(d): + f = open('text.txt', 'w') + for key in d: + f.write(key + '{:>6}'.format(d[key]) + '\n') + f.close() +folder = 'news' +d = lengths(folder) +write_lengths(d) +line = input() +j = len(line) - 1 +k = 1 +for i in range(len(line)//2): + print(line[k:j:]) + j = j-1 + k = k+1 +file_name = input("Введите путь к файлу: ") +f = open(file_name, "r", encoding = "utf(8)") +lines = 0 +i = 0 +for line in f: + arr = line.split() + if len(arr) > 5: + i += 1 + lines += 1 +percentage = i/lines * 100 +print(percentage, "% строк содержит больше 5 строк.") +f.close() +import re +f = open("Птицы.html", "r", encoding = "utf(8)") +f1 = f.read() +f.close() +m = re.sub(u"\\bптиц(?=(\\b|(ы|у|а(х|ми?)?|е)\\b))", "рыб", f1) +k = re.sub(u"\\bПтиц(?=(\\b|(ы|у|а(х|ми?)?|е)\\b))", "Рыб", m) +m = re.sub(u"\\bПтицей\\b", "Рыбой", k) +k = re.sub(u"\\bптицей\\b", "рыбой", m) +f = open("new.html", "w", encoding = "utf(8)") +f.write(k) +f.close +import re +rex = r"\bси(жу|д(я(т|щ(и(й|е|х|ми?)|е(го|му?|й|е)|ая|ую))?|и(шь|т(е|ся)?)?|е(ть|л(а|о(сь)?|и)?|в(ш(и(й|е|х|ми?)?|е(го|му?|й|ю|е)|ая|ую))?)))\b" +file = input("Введите название файла: ") +f = open(file, "r", encoding = "utf(8)") +f1 = f.read() +f1 = f1.lower() +arr = f1.split() +for i, word in enumerate(arr): + arr[i] = word.strip('.,:;&!«»()/-') +f.close() +found = [] +for word in arr: + m = re.search(rex, word) + if m != None: + if word not in found: + print(word) + found.append(word) +a = [] +for i in range(8): + new_element = input() + a.append(new_element) +for i in range(0, 8, 2): + print(a[i], a[i+1], sep = '') +import re +def sentences(text): + text1 = re.split('[.?!]', text) + return text1 +def del_punct(text): + text1 = [re.sub('[^\w\s]', '', sentence) for sentence in text] + return text1 +def count(text): + for sentence in text: + words = sentence.split() + arr = [] + for word in words: + cnt = 0 + if word not in arr: + arr.append(word) + for w in words: + if word == w: + cnt += 1 + if cnt > 1: + print(word, '{:^30}'.format(cnt)) +f = open('собачка.txt', 'r', encoding = 'utf(8)') +f1 = f.read() +f1 = f1.lower() +s = sentences(f1) +d = del_punct(s) +count(d) +a = int(input("Введите число a: ")) +b = int(input("Введите число b: ")) +c = int(input("Введите число c: ")) +if a / b == c: + print(a, "разделить на", b, "равно", c) +else: + print(a, "разделить на", b, "не равно", c) +if a ** b == c: + print(a, "в степени", b, "равно" , c) +else: + print(a, "в степени", b, "не равно", c) +n = int(input("Введите число: ")) +i = 0 +while 2**i <= n: + print(2**i) + i+=1 +import os +import re +def cyr_lat(path): + names = os.listdir(path) + count = 0 + for name in names: + path1 = os.path.join(path, name) + if os.path.isdir(path1): + if re.search('[a-zA-Z]', name) and re.search('[а-яА-Я]', name): + count+=1 + return count +n = cyr_lat('.') +print(os.listdir('.')) +print(n) +def words(): + file = input("Название файла: ") + f = open(file, "r", encoding = "utf(8)") + f1 = f.read() + f1 = f1.lower() + arr = f1.split() + for i, word in enumerate(arr): + arr[i] = word.strip('.,:;!?"()') + f.close() + return arr +def un(): + arr = words() + arr1 = [] + for word in arr: + if word[0] == "u" and word[1] == "n": + arr1.append(word) + return arr1 +def percentage(): + arr = un() + un_number = len(arr) + print(un_number, "слов с приставкой un") + count = 0 + length = int(input("Cлова длиннее, чем: ")) + for word in arr: + if len(word) > length: + count += 1 + percent = count/un_number * 100 + return percent +number = percentage() +print(number, "%") +import os +def countfiles(path): + dic = {} + for root, dirs, files in os.walk('.'): + dic[len(files)] = root + return dic[sorted(list(dic.keys()))[len(list(dic.keys()))-1]] +def main(): + print(countfiles('.')) +main() +arr=[] +for i in range(8): + inpstr=input() + arr.append(inpstr) +for i in range(4): + print(arr[i*2]+arr[i*2+1]) +a={} +total=0 +with5=0 +with open('intext.txt', 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + total=total+1 + a = line.split(' ') + if len(a)>5: + with5=with5+1 +print(with5/total*100) +istr=input() +for i in range(len(istr)//2+len(istr)%2): + print(istr[i:len(istr)-i]) +import re +def getarray(filename): + wordarr=[] + with open(filename, 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + linewords=line.split() + for word in linewords: + wordarr.append(cleanword(word)) + return wordarr +def cleanword(word): + word=word.lower() + falsechars = [] + for i in range(len(word)): + if re.search("[a-яё]",word[i]) == None: + falsechars.append(word[i]) + for char in falsechars: + word = word.replace(char,"") + return word +def searchforms(cleanedarray): + for word in cleanedarray: + if re.match("си(жу|ди(шь|м|те?)?|де(л[аои]?|в(ш(ая|е(му?|е|го|й)|ую|и(й|х|е|ми?)?))?|ть)|дя(т|щ(ая|е(му?|е|го|й)|ую|и(й|х|е|ми?)))?)\Z",word) != None: + print(word) +searchforms(getarray("gulag.txt")) +with open('aphor.txt', 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + if len(line.split()) < 17: + print (line) +resstr="" +aphors=0 +with open('aphor.txt', 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + splitted = line.split() + for word in splitted: + if word[len(word)-1]=="." or word[len(word)-1]=="," or word[len(word)-1]=="?": + word=word[:-1:] + word=word.lower() + if word=="ум": + aphors=aphors+1 + if resstr!="": + resstr+=", " + resstr+=splitted[len(splitted)-1] + break +print('Цитат, содержащих слово "ум": '+str(aphors)) +print(resstr) +print() +print("Введите слова:") +wordarr=[] +while True: + i=input() + if i=="": + break + wordarr.append(i) +with open('aphor.txt', 'r', encoding='utf-8') as f: + lines = f.readlines() + for inpword in wordarr: + print(inpword+":") + printed=False + inpword=inpword.lower() + for line in lines: + splitted = line.split() + for word in splitted: + if word[len(word)-1]=="." or word[len(word)-1]=="," or word[len(word)-1]=="?": + word=word[:-1:] + word=word.lower() + if word==inpword: + printed=True + print(line[:-1:]) + break + if not printed: + print("Слово "+inpword+" в цитатах не найдено") + print() +import os +import re +def getfolders(): + folder = os.listdir() + dirsdict = {} + dirs = 0 + for entity in folder: + if os.path.isdir(entity) and re.search(r'[a-zA-Z]',entity) and re.search(r'[а-яёА-ЯЁ]',entity): + dirs += 1 + if entity not in dirsdict: + print(entity) + print() + print (dirs,'directories total found.') +getfolders() +print("Введите число: ") +n=int(input()) +i=1 +while n>=i: + print(i); + i=i*2 +def getarray(filename): + wordarr=[] + with open(filename, 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + linewords=line.split() + for word in linewords: + wordarr.append(cleanword(word)) + return wordarr +def cleanword(word): + word=word.lower() + falsechars = [] + for i in range(len(word)): + if ord(word[i]) < 97 or ord(word[i]) > 123: + falsechars.append(word[i]) + for char in falsechars: + word = word.replace(char,"") + return word +def get_value_and_percentage(arrayname, minlength): + unWords = 0 + unWordsByLength = 0 + for word in arrayname: + if word[:2] == "un": + unWords = unWords + 1 + if len(word) > minlength: + unWordsByLength = unWordsByLength + 1 + print("Слов с приставкой un-: ",unWords) + if unWords > 0: + print("Процент слов с количеством символов больще ", minlength,": ",unWordsByLength/unWords*100) + else: + print("Процент слов с количеством символов больще ", minlength,": ",0) +fpath="Austen Jane.txt" +inplength = int(input("В искомых словах символов должно быть больше чем: ")) +print("Анализируем файл ",fpath,"...") +get_value_and_percentage(getarray(fpath), inplength) +import random +import re +def getdict (filepath): + dic = {} + with open(filepath, 'r', encoding='utf-8') as csv: + rows = csv.readlines() + for row in rows: + rowvals = re.split(';|,|\n',row.replace(' ','')) + if len(rowvals) == 2: + dic[rowvals[0]] = rowvals[1] + else: + continue + return dic +def orderresponses(correctfile,incorrectfile): + responses = {} + responses["Correct"] = [] + with open(correctfile, 'r', encoding='utf-8') as responsefile: + lineresponses = responsefile.readlines() + for response in lineresponses: + if len(response) > 1: + responses["Correct"].append(response) + responses["Incorrect"] = [] + with open(incorrectfile, 'r', encoding='utf-8') as responsefile: + lineresponses = responsefile.readlines() + for response in lineresponses: + if len(response) > 1: + responses["Incorrect"].append(response) + return responses +def riddle (dictname,orderedresponses): + words = list(dictname.values()) + hints = list(dictname.keys()) + currenthint = random.choice(hints) + while True: + option = input(currenthint+' ') + if dictname[currenthint] == option: + print(random.choice(orderedresponses["Correct"])) + break + else: + print(random.choice(orderedresponses["Incorrect"])) +riddle(getdict("in.csv"),orderresponses("correct.txt","incorrect.txt")) +import re +def cleanword(word): + word=word.lower() + + falsechars = [] + for i in range(len(word)): + if re.search("[a-яё\-]",word[i]) == None: + falsechars.append(word[i]) + for char in falsechars: + word = word.replace(char,"") + if word == '-': + word = '' + return word +def main(): + with open ('profession.txt' , 'r', encoding='utf-8') as f: + sentences = re.split(r'([.?!]|(\.\.\.)) ',re.sub(r'[\t\n]',' ',f.read())) + for sentence in sentences: + if not sentence: + continue + lexemes = [cleanword(word) for word in sentence.split() if cleanword(word)] + dic = {lexeme: 0 for lexeme in lexemes} + for lexeme in lexemes: + dic[lexeme]=dic[lexeme]+1 + outdic = {key: dic[key] for key in sorted(list(dic.keys())) if dic[key] > 1} + template = '{} {:^10}' + for wrd in sorted(list(outdic.keys())): + print(template.format(wrd,outdic[wrd])) +main() +import re +def main(): + with open("birds.html",'r',encoding='utf-8') as file: + f = file.read() + f = re.sub(r'([^а-яё])птиц((у|ы|а(м?и?|х?)|е)?[^а-яё])',r'\1рыб\2',f) + f = re.sub(r'([^а-яё])птицей([^а-яё])',r'\1рыбой\2',f) + f = re.sub(r'([^а-яё])Птиц((у|ы|а(м?и?|х?)|е)?[^а-яё])',r'\1Рыб\2',f) + f = re.sub(r'([^а-яё])Птицей([^а-яё])',r'\1Рыбой\2',f) + with open("fishes.html",'w',encoding='utf-8') as outfile: + outfile.write(f) +main() +import os +import re +def countwords(filepath): + W=0 + with open (filepath,'r') as infile: + lines = infile.readlines() + for line in lines: + if line[:3]=="": + W+=1 + return W +def printwordscount(fileslist): + with open ('words_count.txt','w',encoding='utf-8') as outfile: + for i in range(len(fileslist)): + outfile.write(fileslist[i][1]+'\t'+str(countwords(fileslist[i][0]))+'\n') +def makefileslist(folderpath): + outlist=[] + filenames=os.listdir(folderpath) + for filename in filenames: + outlist.append([folderpath+os.sep+filename,filename]) + return outlist +def getmeta(filepath): + outstr="" + with open (filepath,'r') as infile: + lines = infile.readlines() + for line in lines: + if line[:5]=="',line): + sentencesarr.append([]) + if line[:3]=="": + sentencesarr[len(sentencesarr)-1].append(line) + for sentence in sentencesarr: + sentencestr="" + for line in sentence: + sentencestr+=re.sub(r'<.?w>||\n|<.p>||| ','',line) + sentencestr+=' ' + buffarr=[] + prevA=False + for line in sentence: + if re.search('A=',line) and re.search('gen',line) and not prevA: + buffarr.append(re.search('(.*?)',line).group(1)) + prevA=True + elif re.search('S,',line) and re.search('gen',line) and prevA: + buffarr.append(re.search('(.*?)',line).group(1)) + bigramstxt.write(buffarr[0]+' '+buffarr[1]+'\t'+sentencestr+'\n') + buffarr.pop() + buffarr.pop() + prevA=False + else: + prevA=False + if len(buffarr)>0: + buffarr.pop() +def main(): + flist=makefileslist('news') + printwordscount(flist) + makecsv(flist) + makebigrams(flist) +main() +import random +NEXTGENDER="" +NEXTCASE="nom" +NEEDSVERB=False +ISANIM=False +def sylls(word): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + outs=0 + word=word.lower() + for i in range(len(word)): + if word[i]=='а' or word[i]=='е' or word[i]=='ё' or word[i]=='и' or word[i]=='о' or word[i]=='у' or word[i]=='ы' or word[i]=='э' or word[i]=='ю' or word[i]=='я': + outs = outs + 1 + return outs +def verb(category,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + if category == "past_m": + past_m = [] + f = open("v_past_m.txt", 'r', encoding='utf-8') + for word in f: + past_m.append(word) + NEXTGENDER="m" + pick = random.choice(past_m)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_m)[:-1] + return pick + if category == "past_n": + past_n = [] + f = open("v_past_n.txt", 'r', encoding='utf-8') + for word in f: + past_n.append(word) + NEXTGENDER="n" + pick = random.choice(past_n)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_n)[:-1] + return pick + if category == "past_f": + past_f = [] + f = open("v_past_f.txt", 'r', encoding='utf-8') + for word in f: + past_f.append(word) + NEXTGENDER="f" + pick = random.choice(past_f)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_f)[:-1] + return pick + else: + present = [] + f = open("v_praes_tran.txt", 'r', encoding='utf-8') + for word in f: + present.append(word) + pick = random.choice(present)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(present)[:-1] + NEXTCASE="acc" + return pick +def bigram(gender,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + casearr=[] + bigramarr=[] + if gender == "m": + f = open("v_abl_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + elif gender == "f": + f = open("v_abl_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + else: + f = open("v_abl_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + pick = random.randint(0,len(bigramarr)-1) + res = bigramarr[pick][:-1] + while sylls(res) < min_syllables or sylls(res) > max_syllables: + pick = random.randint(0,len(bigramarr)-1) + res = bigramarr[pick][:-1] + NEXTCASE=casearr[pick] + return res +def noun(case,gender,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + if case == "nom": + nomnouns = [] + nomgenders = [] + c=0 + f = open("nouns_f_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("f") + if gender == "f": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + c = len(nomnouns) + f = open("nouns_m_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("m") + if gender == "m": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + c = len(nomnouns) + f = open("nouns_n_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("n") + if gender == "n": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + else: + s = random.randint(c,len(nomnouns)-1) + pick = nomnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(nomnouns)-1) + pick = nomnouns[s][:-1] + NEXTGENDER = nomgenders[s] + return pick + if case == "gen": + gennouns = [] + c=0 + f = open("nouns_f_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "f": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + c = len(gennouns) + f = open("nouns_m_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "m": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + c = len(gennouns) + f = open("nouns_n_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "n": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + else: + pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] + return pick + if case == "dat": + datnouns = [] + c=0 + f = open("nouns_f_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "f": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + c = len(datnouns) + f = open("nouns_m_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "m": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + c = len(datnouns) + f = open("nouns_n_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "n": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + else: + pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] + return pick + if case == "ins": + insnouns = [] + c=0 + f = open("nouns_f_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "f": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + c = len(insnouns) + f = open("nouns_m_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "m": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + c = len(insnouns) + f = open("nouns_n_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "n": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + else: + pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] + return pick + if case == "abl": + ablnouns = [] + c=0 + f = open("nouns_f_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "f": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + c = len(ablnouns) + f = open("nouns_m_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "m": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + c = len(ablnouns) + f = open("nouns_n_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "n": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + else: + pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] + return pick + else: + accnouns = [] + c=0 + f = open("nouns_f_acc.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "f": + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + return pick + c = len(accnouns) + f = open("nouns_m_acc_anim.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "m" and ISANIM: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + return pick + c = len(accnouns) + f = open("nouns_m_acc_inan.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "m": + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + return pick + c = len(accnouns) + f = open("nouns_n_acc.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "n": + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + return pick + else: + pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] + return pick +def adj(case,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + if case == "nom": + nomarr = [] + nomgender = [] + f = open("adj_f_nom.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("f") + f = open("adj_m_nom.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("m") + f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("n") + s = random.randint(0,len(nomarr)-1) + pick = nomarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(nomarr)-1) + pick = nomarr[s][:-1] + NEXTGENDER=nomgender[s] + return pick + if case == "gen": + genarr = [] + gengender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("f") + f = open("adj_mn_gen.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("m") + f = open("adj_mn_gen.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("n") + s = random.randint(0,len(genarr)-1) + pick = genarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(genarr)-1) + pick = genarr[s][:-1] + NEXTGENDER=gengender[s] + return pick + if case == "dat": + datarr = [] + datgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("f") + f = open("adj_mn_dat.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("m") + f = open("adj_mn_dat.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("n") + s = random.randint(0,len(datarr)-1) + pick = datarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(datarr)-1) + pick = datarr[s][:-1] + NEXTGENDER=datgender[s] + return pick + if case == "ins": + insarr = [] + insgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("f") + f = open("adj_mn_ins.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("m") + f = open("adj_mn_ins.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("n") + s = random.randint(0,len(insarr)-1) + pick = insarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(insarr)-1) + pick = insarr[s][:-1] + NEXTGENDER=insgender[s] + return pick + if case == "abl": + ablarr = [] + ablgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("f") + f = open("adj_mn_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("m") + f = open("adj_mn_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("n") + s = random.randint(0,len(ablarr)-1) + pick = ablarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(ablarr)-1) + pick = ablarr[s][:-1] + NEXTGENDER=ablgender[s] + return pick + else: + accarr = [] + accgender = [] + f = open("adj_f_acc.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("f") + if ISANIM: + f = open("adj_m_acc_anim.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("m") + else: + f = open("adj_m_acc_inan.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("m") + f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("n") + s = random.randint(0,len(accarr)-1) + pick = accarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(accarr)-1) + pick = accarr[s][:-1] + NEXTGENDER=accgender[s] + return pick +def adv(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + advs = [] + f = open("adv.txt", 'r', encoding='utf-8') + for word in f: + advs.append(word) + return random.choice(advs)[:-1] +def punctuation(isend): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + marks = [".", "?", "!", "...", ","] + r = random.choice(marks) + while r=="," and isend!="nonend": + r = random.choice(marks) + return r +def verbverse7(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + s="" + sylls_here=0 + opt = random.choice([1,2,3]) + if opt == 1 or opt == 2: + s=bigram(NEXTGENDER,1,4) + sylls_here=sylls(s) + s+=' ' + if sylls_here >= 3: + s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) + else: + s+=adj(NEXTCASE,2,3) + sylls_here=sylls(s) + s+=' ' + s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) + s+=punctuation("nonend") + s=s.capitalize() + NEEDSVERB=False + else: + b="past_" + b+=NEXTGENDER + s+=verb(b,1,3) + sylls_here=sylls(s) + s+=' ' + s+=adj("nom",2,2) + sylls_here=sylls(s) + s+=' ' + s+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) + s=s.capitalize() + return s +def verbverse5(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + s=adv() + sylls_here=sylls(s) + s+=' ' + b="past_" + b+=NEXTGENDER + s+=verb(b,5-sylls_here,5-sylls_here) + s+=punctuation("nonend") + s=s.capitalize() + NEEDSVERB=False + return s +def verse7a(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=verb("present_trans",2,3) + sylls_here=sylls(v) + v+=' ' + v+=adj("acc",2,5-sylls_here) + sylls_here=sylls(v) + v+=' ' + v+=noun("acc",NEXTGENDER,7-sylls_here,7-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse7b(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=adj("nom",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) + v+=' ' + b="past_" + b+=NEXTGENDER + v+=verb(b,2,2) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse7c(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=adv() + sylls_here=sylls(v) + v+=' ' + verse = random.choice([1,2,3]) + if verse == 1: + v+=verb("past_m",2,5-sylls_here) + elif verse == 2: + v+=verb("past_f",2,5-sylls_here) + else: + v+=verb("past_n",2,5-sylls_here) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse5a(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=verb("present_nontrans",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun(NEXTCASE,"indiff",5-sylls_here,5-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse5b(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=adj("nom",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) + v=v.capitalize() + NEEDSVERB=True + return v +def verse5c(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + sylls_here=0 + v=noun("nom","indiff",2,3) + sylls_here=sylls(v) + v+=' ' + b="past_" + b+=NEXTGENDER + v+=verb(b,5-sylls_here,5-sylls_here) + v=v.capitalize() + v+=punctuation("nonend") + return v +def make_verse7(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + if NEEDSVERB: + verbverse7() + verse = random.choice([1,2,3]) + if verse == 1: + return verse7a() + elif verse == 2: + return verse7b() + else: + return verse7c() +def make_verse5(): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + global ISANIM + if NEEDSVERB: + verbverse5() + verse = random.choice([1,2,3]) + if verse == 1: + return verse5a() + elif verse == 2: + return verse5b() + else: + return verse5c() +for n in range(random.randint(1,5)): + print(make_verse5()) + print(make_verse7()) + print(make_verse5()) + print(make_verse7()) + lastv=make_verse7() + if lastv[-3:] == "...": + lastv = lastv[:-3]+punctuation("end") + else: + lastv = lastv[:-1]+punctuation("end") + print(lastv) + print() +import random +NEXTGENDER="" +NEXTCASE="nom" +NEEDSVERB=False +ISANIM=False +def sylls(word): + outs=0 + word=word.lower() + for i in range(len(word)): + if word[i]=='а' or word[i]=='е' or word[i]=='ё' or word[i]=='и' or word[i]=='о' or word[i]=='у' or word[i]=='ы' or word[i]=='э' or word[i]=='ю' or word[i]=='я': + outs = outs + 1 + return outs +def verb(category,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + if category == "past_m": + past_m = [] + f = open("v_past_m.txt", 'r', encoding='utf-8') + for word in f: + past_m.append(word) + NEXTGENDER="m" + pick = random.choice(past_m)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_m)[:-1] + return pick + if category == "past_n": + past_n = [] + f = open("v_past_n.txt", 'r', encoding='utf-8') + for word in f: + past_n.append(word) + NEXTGENDER="n" + pick = random.choice(past_n)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_n)[:-1] + return pick + if category == "past_f": + past_f = [] + f = open("v_past_f.txt", 'r', encoding='utf-8') + for word in f: + past_f.append(word) + NEXTGENDER="f" + pick = random.choice(past_f)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(past_f)[:-1] + return pick + else: + present = [] + f = open("v_praes_tran.txt", 'r', encoding='utf-8') + for word in f: + present.append(word) + pick = random.choice(present)[:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = random.choice(present)[:-1] + NEXTCASE="acc" + return pick +def bigram(gender,min_syllables,max_syllables): + global NEXTCASE + casearr=[] + bigramarr=[] + if gender == "m": + f = open("v_abl_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_m.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + elif gender == "f": + f = open("v_abl_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_f.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + else: + f = open("v_abl_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("abl") + f = open("v_acc_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("acc") + f = open("v_dat_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("dat") + f = open("v_gen_n.txt", 'r', encoding='utf-8') + for word in f: + bigramarr.append(word) + casearr.append("gen") + pick = random.randint(0,len(bigramarr)-1) + res = bigramarr[pick][:-1] + while sylls(res) < min_syllables or sylls(res) > max_syllables: + pick = random.randint(0,len(bigramarr)-1) + res = bigramarr[pick][:-1] + NEXTCASE=casearr[pick] + return res +def noun(case,gender,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global ISANIM + if case == "nom": + nomnouns = [] + nomgenders = [] + c=0 + f = open("nouns_f_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("f") + if gender == "f": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + c = len(nomnouns) + f = open("nouns_m_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("m") + if gender == "m": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + c = len(nomnouns) + f = open("nouns_n_nom.txt", 'r', encoding='utf-8') + for word in f: + nomnouns.append(word) + nomgenders.append("n") + if gender == "n": + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] + return pick + else: + s = random.randint(c,len(nomnouns)-1) + pick = nomnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(nomnouns)-1) + pick = nomnouns[s][:-1] + NEXTGENDER = nomgenders[s] + return pick + if case == "gen": + gennouns = [] + c=0 + f = open("nouns_f_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "f": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + c = len(gennouns) + f = open("nouns_m_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "m": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + c = len(gennouns) + f = open("nouns_n_gen.txt", 'r', encoding='utf-8') + for word in f: + gennouns.append(word) + if gender == "n": + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] + return pick + else: + pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] + return pick + if case == "dat": + datnouns = [] + c=0 + f = open("nouns_f_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "f": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + c = len(datnouns) + f = open("nouns_m_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "m": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + c = len(datnouns) + f = open("nouns_n_dat.txt", 'r', encoding='utf-8') + for word in f: + datnouns.append(word) + if gender == "n": + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] + return pick + else: + pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] + return pick + if case == "ins": + insnouns = [] + c=0 + f = open("nouns_f_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "f": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + c = len(insnouns) + f = open("nouns_m_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "m": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + c = len(insnouns) + f = open("nouns_n_ins.txt", 'r', encoding='utf-8') + for word in f: + insnouns.append(word) + if gender == "n": + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] + return pick + else: + pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] + return pick + if case == "abl": + ablnouns = [] + c=0 + f = open("nouns_f_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "f": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + c = len(ablnouns) + f = open("nouns_m_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "m": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + c = len(ablnouns) + f = open("nouns_n_abl.txt", 'r', encoding='utf-8') + for word in f: + ablnouns.append(word) + if gender == "n": + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] + return pick + else: + pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] + return pick + else: + accnouns = [] + c=0 + f = open("nouns_f_acc.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "f": + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + return pick + c = len(accnouns) + f = open("nouns_m_acc_anim.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "m" and ISANIM: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + return pick + c = len(accnouns) + f = open("nouns_m_acc_inan.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "m": + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(c,len(accnouns)-1) + pick = accnouns[s][:-1] + return pick + c = len(accnouns) + f = open("nouns_n_acc.txt", 'r', encoding='utf-8') + for word in f: + accnouns.append(word) + if gender == "n": + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] + return pick + else: + pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] + return pick +def adj(case,min_syllables,max_syllables): + global NEXTCASE + global NEXTGENDER + global ISANIM + if case == "nom": + nomarr = [] + nomgender = [] + f = open("adj_f_nom.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("f") + f = open("adj_m_nom.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("m") + f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') + for word in f: + nomarr.append(word) + nomgender.append("n") + s = random.randint(0,len(nomarr)-1) + pick = nomarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(nomarr)-1) + pick = nomarr[s][:-1] + NEXTGENDER=nomgender[s] + return pick + if case == "gen": + genarr = [] + gengender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("f") + f = open("adj_mn_gen.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("m") + f = open("adj_mn_gen.txt", 'r', encoding='utf-8') + for word in f: + genarr.append(word) + gengender.append("n") + s = random.randint(0,len(genarr)-1) + pick = genarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(genarr)-1) + pick = genarr[s][:-1] + NEXTGENDER=gengender[s] + return pick + if case == "dat": + datarr = [] + datgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("f") + f = open("adj_mn_dat.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("m") + f = open("adj_mn_dat.txt", 'r', encoding='utf-8') + for word in f: + datarr.append(word) + datgender.append("n") + s = random.randint(0,len(datarr)-1) + pick = datarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(datarr)-1) + pick = datarr[s][:-1] + NEXTGENDER=datgender[s] + return pick + if case == "ins": + insarr = [] + insgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("f") + f = open("adj_mn_ins.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("m") + f = open("adj_mn_ins.txt", 'r', encoding='utf-8') + for word in f: + insarr.append(word) + insgender.append("n") + s = random.randint(0,len(insarr)-1) + pick = insarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(insarr)-1) + pick = insarr[s][:-1] + NEXTGENDER=insgender[s] + return pick + if case == "abl": + ablarr = [] + ablgender = [] + f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("f") + f = open("adj_mn_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("m") + f = open("adj_mn_abl.txt", 'r', encoding='utf-8') + for word in f: + ablarr.append(word) + ablgender.append("n") + s = random.randint(0,len(ablarr)-1) + pick = ablarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(ablarr)-1) + pick = ablarr[s][:-1] + NEXTGENDER=ablgender[s] + return pick + else: + accarr = [] + accgender = [] + f = open("adj_f_acc.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("f") + if ISANIM: + f = open("adj_m_acc_anim.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("m") + else: + f = open("adj_m_acc_inan.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("m") + f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') + for word in f: + accarr.append(word) + accgender.append("n") + s = random.randint(0,len(accarr)-1) + pick = accarr[s][:-1] + while sylls(pick) < min_syllables or sylls(pick) > max_syllables: + s = random.randint(0,len(accarr)-1) + pick = accarr[s][:-1] + NEXTGENDER=accgender[s] + return pick +def adv(): + advs = [] + f = open("adv.txt", 'r', encoding='utf-8') + for word in f: + advs.append(word) + return random.choice(advs)[:-1] +def punctuation(isend): + marks = [".", "?", "!", "...", ","] + r = random.choice(marks) + while r=="," and isend!="nonend": + r = random.choice(marks) + return r +def verbverse7(islast): + global NEXTCASE + global NEXTGENDER + global NEEDSVERB + s="" + sylls_here=0 + opt = random.choice([1,2,3]) + if opt == 1 or opt == 2 or islast: + s=bigram(NEXTGENDER,1,4) + sylls_here=sylls(s) + s+=' ' + if sylls_here >= 3: + s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) + else: + s+=adj(NEXTCASE,2,3) + sylls_here=sylls(s) + s+=' ' + s+=noun(NEXTCASE,NEXTGENDER,7-sylls_here,7-sylls_here) + s+=punctuation("nonend") + s=s.capitalize() + NEEDSVERB=False + else: + b="past_" + b+=NEXTGENDER + s+=verb(b,1,3) + sylls_here=sylls(s) + s=s.capitalize() + s+=punctuation("end") + s+=' ' + s+=adj("nom",2,2).capitalize() + sylls_here=sylls(s) + s+=' ' + s+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) + return s +def verbverse5(): + global NEXTGENDER + global NEEDSVERB + s=adv() + sylls_here=sylls(s) + s+=' ' + b="past_" + b+=NEXTGENDER + s+=verb(b,5-sylls_here,5-sylls_here) + s+=punctuation("nonend") + s=s.capitalize() + NEEDSVERB=False + return s +def verse7a(): + global NEXTGENDER + sylls_here=0 + v=verb("present_trans",2,3) + sylls_here=sylls(v) + v+=' ' + v+=adj("acc",2,5-sylls_here) + sylls_here=sylls(v) + v+=' ' + v+=noun("acc",NEXTGENDER,7-sylls_here,7-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse7b(): + global NEXTGENDER + sylls_here=0 + v=adj("nom",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) + v+=' ' + b="past_" + b+=NEXTGENDER + v+=verb(b,2,2) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse7c(): + global NEXTGENDER + sylls_here=0 + v=adv() + sylls_here=sylls(v) + v+=' ' + verse = random.choice([1,2,3]) + if verse == 1: + v+=verb("past_m",2,5-sylls_here) + elif verse == 2: + v+=verb("past_f",2,5-sylls_here) + else: + v+=verb("past_n",2,5-sylls_here) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse5a(): + global NEXTCASE + sylls_here=0 + v=verb("present_nontrans",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun(NEXTCASE,"indiff",5-sylls_here,5-sylls_here) + v+=punctuation("nonend") + v=v.capitalize() + return v +def verse5b(): + global NEXTGENDER + global NEEDSVERB + sylls_here=0 + v=adj("nom",2,3) + sylls_here=sylls(v) + v+=' ' + v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) + v=v.capitalize() + NEEDSVERB=True + return v +def verse5c(): + global NEXTGENDER + sylls_here=0 + v=noun("nom","indiff",2,3) + sylls_here=sylls(v) + v+=' ' + b="past_" + b+=NEXTGENDER + v+=verb(b,5-sylls_here,5-sylls_here) + v=v.capitalize() + v+=punctuation("nonend") + return v +def make_verse7(islast): + global NEEDSVERB + if NEEDSVERB: + return verbverse7(islast) + verse = random.choice([1,2,3]) + if verse == 1: + return verse7a() + elif verse == 2: + return verse7b() + else: + return verse7c() +def make_verse5(): + global NEEDSVERB + if NEEDSVERB: + return verbverse5() + verse = random.choice([1,2,3]) + if verse == 1: + return verse5a() + elif verse == 2: + return verse5b() + else: + return verse5c() +for n in range(random.randint(1,5)): + print(make_verse5()) + print(make_verse7(False)) + print(make_verse5()) + print(make_verse7(False)) + lastv=make_verse7(True) + if lastv[-3:] == "...": + lastv = lastv[:-3]+punctuation("end") + else: + lastv = lastv[:-1]+punctuation("end") + print(lastv) + print() +import re +def getcode(filename): + with open (filename,'r',encoding='utf-8') as f: + t=f.read() + return re.search('ISO 639-3(.|\n)*?http:\/\/www-01\.sil\.org\/iso639-3\/documentation\.asp\?id=(...)',t).group(2) +def main(): + print(getcode('korean.html')) +main() + + + + +import os +import re +text = [] +k = 0 +file = open("file.txt", "w") +folder = 'news' +for f in os.listdir(folder): + with open(os.path.join(folder, f)) as text: + word = f.read() + text = text.split('\n') + for word in text: + m = re.search('', word) + if m: + k +=1 + file.write(f +' ' + str(k) +'\n') + k = 0 +file.close() + + +import re +def opentext(): + words = [] + with open ('test2.xml', 'r', encoding = 'utf-8') as f: + text = f.read() + words = text.split('>') + for i in range(len(words)): + words[i] = words[i].strip('.,?*()«»') + return words +def countstr(): + text = opentext() + k = 1 + for strin in text: + if strin.startswith('\n'[0]): + k = k+1 + print (k) + return k +def writetext(): + b = [] + a = countstr() + a = str(a) + b.append(a) + with open ('text.txt', 'w', encoding='utf-8') as f: + f.write(a) +writetext() +def diction(): + ar1 = [] + ar2 = [] + ar3 = [] + text = opentext() + for strin in text: + if re.search(r"lemma=", strin): + if strin not in ar1: + ar1.append(strin) + for strin in ar1: + b = re.findall('type="([a-zA-Z0-9þ]+)', strin) + for i in b: + if i not in ar2: + ar2.append(i) + print(ar2) + for a in ar2: + l=0 + for line in text: + if re.findall('type="(' + a + ')', line): + l=l+1 + ar3.append(l) + print(ar3) + d = dict(zip(ar2, ar3)) + print(d) + with open ('text.txt', 'a', encoding='utf-8') as f: + for key in d: + f.write('\n'+ key) +diction() + + + + +words = [] +word = input("Введите слово ") +while word != (""): + if len(word) > 5: + words.append(word) + word = input("Введите слово ") +print('\n'.join(words)) + + + + + + +import re +def opentext(): + words = [] + with open ('text.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + text = text.replace('ё', 'е') + words = text.split() + for i in range(len(words)): + words[i] = words[i].strip('.,?*()«»') + return words +def reg(): + text = [] + ar1 = [] + text = opentext() + for word in text: + if re.search(r"\bна((ш((л[аио](сь)?)|(ел(ся)?)|(едши(й(ся)?)?)))|(йти(сь)?)|(йд((я|у|ете|и(те)?(сь)?)|(ут|е(м|т|шь)(ся)?)|(енный))))\b", word): + if word not in ar1: + ar1.append(word) + print (ar1) +reg() + + + +word = input() +a = word[0::2] +for a in a[0:]: + if (a.find("п") != -1): + print ("п") + if (a.find("о") != -1): + print ("о") + if (a.find("е") != -1): + print ("е") +word = input() +for i in range(len(word)): + print (word[:i+1]) + +import re +a = input ('File name: ') +with open (a, 'r', encoding='utf-8') as f: + b = re.findall('>UTC((?:\+|-)(?:[0-9]|10|11|12|13|14)(?::(?:30|45))?)', f.read()) + print (' '.join(b)) +with open ('text.txt', 'w', encoding='utf-8') as f: + for i in b: + f.write(i) + + + + + +dots = [] +dictionary = {'белый':'снег','мобильный':'телефон','контрольная':'работа','диковинный':'зверь','фотографическая':'память','резиновый':'сапог','дремучий':'лес'} +for word in dictionary: + l = len(dictionary[word]) + while l!=0: + l=l-1 + dots.append(".") + d = ''.join(dots) + print('\n',word, d) + dots.clear() + answ = input() + if answ == dictionary[word]: + print ('Right!') + else: + print ('Try again :(') +list = [] +with open('prob.txt', 'r', encoding='utf-8') as f: + for line in f.readlines(): + lengh = len(line) + list.append(lengh) +print(list) +maxim = list[0] +minim = list[0] +for num in list: + if num <= minim: + minim = num + if num > maxim: + maxim = num +print(minim) +print(maxim) +print(maxim/minim) + + + + +def opentext(): + print("Введите название файла: ") + name = input() + words = [] + with open (name, 'r', encoding = 'utf-8') as f: + text = f.read() + words = text.split() + for i in range(len(words)): + words[i] = words[i].strip('.,?*()«»') + return words +def nouns(): + text = [] + adj = [] + k = 0 + text = opentext() + for word in text: + l = len(word) + if word[l-4:] == 'ness': + adj.append(word) + return(adj) +def freq(): + maxim = 0 + adjectives = nouns() + for word in adjectives: + k = 0 + for word1 in adjectives: + if word1 == word: + k = k+1 + if k>maxim: + maxim = k + mostfreq = word + return (mostfreq) +def count(): + summ = [] + words = nouns() + for word in words: + s = 0 + for word1 in words: + if word == word1: + del word1 + s = s+1 + if s == 0: + summ.append(word) + return (len(summ)) +def function1(): + a = count() + w = freq() + print(a) + print(w) +a = int(input()) +b = int(input()) +c = int(input()) +if a * b == c: + print ('a умножить на b равно c') +else: + print ('a умножить на b НЕ равно c') +if a * c + b == 0: + print ('c является решением линейного уравнения ax + b = 0') +else: + print ('c НЕ является решением линейного уравнения ax + b = 0') + +import re +def opentext(): + words = [] + with open ('Викинги — Википедия.html', 'r', encoding = 'utf-8') as f: + text = f.read() + m = re.sub('викинг','бурундук',text) + n = re.sub('Викинг','Бурундук',m) + with open ('Викинги — Википедия.html', 'w', encoding = 'utf-8') as f: + f.write(n) +opentext() +import os +import re +def preprocessing (content): + notags = re.sub(r'\<[^>]*\>', '',content) + nons = re.sub (r'\n', '', notags) + return nons +def countsen (): + for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'news')): + for f in files: + with open (os.path.join(root, f), 'r', encoding='Windows-1251')as n: + content = n.read() + content1 = preprocessing(content) + numsen = content1.split('.') + num = str(len(numsen)) + template = "{}\t{}\n" + with open('numsens.txt', 'a', encoding='utf-8') as k: + k.write(template.format(f, num)) +countsen() +def author(): + for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'news')): + for f in files: + with open (os.path.join(root, f), 'r', encoding='Windows-1251')as n: + content = n.read() + content1 = preprocessing(content) + words = content1.split(' ') + author = words [0]+' '+words[1] + name = f + topic = re.search(r'', content) + with open('table.csv', 'a', encoding='Windows-1251') as k: + k.write(name+author) +author() +import random +def qws(): + with open('words1.txt','r', encoding = 'utf-8') as ws1: + wss1=[] + for line in ws1: + line = line.strip() + wss1.append(line) + return random.choice(wss1) +def wws(): + with open('words2.txt','r', encoding = 'utf-8') as ws2: + wss2=[] + for line in ws2: + line = line.strip() + wss2.append(line) + return random.choice(wss2) +def ews(): + with open('words3.txt','r', encoding = 'utf-8') as ws3: + wss3=[] + for line in ws3: + line = line.strip() + wss3.append(line) + return random.choice(wss3) +def verb(): + with open('words2v.txt','r', encoding = 'utf-8') as ws2v: + wss2v=[] + for line in ws2v: + line = line.strip() + wss2v.append(line) + return random.choice(wss2v) +def p51(): + pros=random.choice([1, 2, 3]) + if pros == 1: + return qws()+' '+wws()+' '+verb() + elif pros == 2: + return verb()+' '+wws()+' '+qws() + else: + return wws()+' '+qws()+' '+verb() +def p52(): + pr=random.choice([1, 2]) + if pr == 1: + return ews()+' '+verb() + else: + return verb()+' '+ews() +def p5(): + prost = random.choice([1, 2]) + if prost == 1: + return p51() + else: + return p52() +def very(): + with open('ochen.txt','r', encoding = 'utf-8') as och: + oche=[] + for line in och: + line = line.strip() + oche.append(line) + return random.choice(oche) +def red(): + with open('adjn.txt','r', encoding = 'utf-8') as adj: + adjs=[] + for line in adj: + line = line.strip() + adjs.append(line) + return random.choice(adjs) +def plat(): + with open('pla.txt','r', encoding = 'utf-8') as pla: + plas=[] + for line in pla: + line = line.strip() + plas.append(line) + return random.choice(plas) +def znak(): + zn = [".", "!", "..."] + return random.choice(zn) +def p7(): + return very()+' '+red()+' '+plat()+znak() +def maybe(): + with open('maybe.txt','r', encoding = 'utf-8') as may: + be=[] + for line in may: + line = line.strip() + be.append(line) + return random.choice(be) +def sun(): + with open('pla.txt','r', encoding = 'utf-8') as suns: + sunn=[] + for line in suns: + line = line.strip() + sunn.append(line) + return random.choice(sunn) +def fin(): + with open('fin.txt','r', encoding = 'utf-8') as vse: + vses=[] + for line in vse: + line = line.strip() + vses.append(line) + return random.choice(vses) +def last(): + return maybe()+', '+sun()+' '+fin() +def poem(): + print (p5()) + print (p7()) + print (p5()) + print (p7()) + print (last()) +poem() +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + longwords=[] + text_wo_punct = re.sub(punct, '', text.lower()) + words = text_wo_punct.strip().split() + for word in words: + if len(word)>=4: + longwords.append(word) + return longwords +import os +anek = '' +teh = '' +izvest = '' +for root, dirs, files in os.walk('texts'): + for f in files: + if 'anekdots' in root: + num_anek = len(files) + anek += open(os.path.join(root, f), encoding = 'utf-8').read() + elif 'izvest' in root: + num_izvest = len(files) + izvest += open(os.path.join(root, f), encoding = 'utf-8').read() + elif 'teh_mol' in root: + num_teh = len(files) + teh += open(os.path.join(root, f), encoding = 'utf-8').read() +words_anek = preprocessing(anek) +words_teh = preprocessing(teh) +words_izvest = preprocessing(izvest) +words = words_anek + words_teh + words_izvest +def freq_dict(arr): + dic = {} + for element in arr: + if element in dic: + dic[element] += 1 + else: + dic[element] = 1 + return dic +corpus_freq = freq_dict(words) +anek_freq = freq_dict(words_anek) +izvest_freq = freq_dict(words_izvest) +teh_freq = freq_dict(words_teh) +def first (dic): + i = 0 + for word in sorted(dic, key = lambda m: -dic[m]): + if i > 100: + break + i += 1 + return dic +corpus_freq_first= first(corpus_freq) +def firstcat (dic): + newdic = {} + if word in words: + newdic [word] = dic [word] + return newdic +anek_freq_first = firstcat(anek_freq) +izvest_freq_first = firstcat(izvest_freq) +teh_freq_first = firstcat(teh_freq) +def pmi_for_cats(x, y): + words_ex = [] + freq_ex = {} + if y == 'anek': + dic = anek_freq_first + arr = words_anek + num = num_anek + words_ex = words - words_anek + freq_ex = freqdict (words_ex) + elif y == 'teh': + dic = teh_freq_first + arr = words_teh + num = num_teh + words_ex = words - words_teh + freq_ex = freqdict (words_ex) + elif y == 'izvest': + dic = izvest_freq_first + arr = words_izvest + num = num_izvest + words_ex = words - words_izvest + freq_ex = freqdict (words_ex) + p_xy = dic[x]/len(arr) + p_x, p_y = freq_ex[x]/len(words_ex), num/(num_izvest + num_teh + num_anek) + pmi = log(p_xy/(p_x * p_y)) + return pmi +cat_pmi = {} +i = 0 +for word in corpus_freq: + if i > 100: + break + try: + pmi_anek = pmi_for_cats(word, 'anek') + except KeyError: + pmi_anek = 0 + try: + pmi_teh = pmi_for_cats(word, 'teh') + except KeyError: + pmi_teh = 0 + try: + pmi_izvest = pmi_for_cats(word, 'izvest') + except KeyError: + pmi_izvest = 0 + max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) + if max_pmi == 0: + continue + if max_pmi == pmi_anek: + cat = 'anek' + elif max_pmi == pmi_teh: + cat = 'teh' + elif max_pmi == pmi_izvest: + cat = 'izvest' + print(word, cat) + i += 1 +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + longwords=[] + text_wo_punct = re.sub(punct, '', text.lower()) + words = text_wo_punct.strip().split() + for word in words: + if len(word)>=4: + longwords.append(word) + return longwords +import os +anek = '' +teh = '' +izvest = '' +for root, dirs, files in os.walk('texts'): + for f in files: + if 'anekdots' in root: + num_anek = len(files) + anek += open(os.path.join(root, f), encoding='utf-8').read() + elif 'izvest' in root: + num_izvest = len(files) + izvest += open(os.path.join(root, f), encoding='utf-8').read() + elif 'teh_mol' in root: + num_teh = len(files) + teh += open(os.path.join(root, f), encoding='utf-8').read() +words_anek = preprocessing(anek) +words_teh = preprocessing(teh) +words_izvest = preprocessing(izvest) +words = words_anek + words_teh + words_izvest +def bigram_dict(words): + bigrams = [] + for ind in range(1, len(words) - 1): + bigrams.append(' '.join([words[ind - 1], words[ind]])) + bigram_freq = {} + for b in bigrams: + if b in bigram_freq: + bigram_freq[b] += 1 + else: + bigram_freq[b] = 1 + return bigram_freq +corpus_bfreq = bigram_dict(words) +anek_bfreq = bigram_dict(words_anek) +izvest_bfreq = bigram_dict(words_izvest) +teh_bfreq = bigram_dict(words_teh) +def pmi_for_cats(x, y): + if y == 'anek': + dic = anek_bfreq + num = num_anek + elif y == 'teh': + dic = teh_bfreq + num = num_teh + elif y == 'izvest': + dic = izvest_bfreq + num = num_izvest + p_xy = dic[x]/len(dic) + p_x, p_y = corpus_bfreq[x]/len(corpus_bfreq), num/(num_izvest + num_teh + num_anek) + pmi = log(p_xy/(p_x * p_y)) + return pmi +cat_pmi = {} +i = 0 +for bigram in corpus_bfreq: + if i > 100: + break + try: + pmi_anek = pmi_for_cats(bigram, 'anek') + except KeyError: + pmi_anek = 0 + try: + pmi_teh = pmi_for_cats(bigram, 'teh') + except KeyError: + pmi_teh = 0 + try: + pmi_izvest = pmi_for_cats(bigram, 'izvest') + except KeyError: + pmi_izvest = 0 + max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) + if max_pmi == 0: + continue + if max_pmi == pmi_anek: + cat = 'anek' + elif max_pmi == pmi_teh: + cat = 'teh' + elif max_pmi == pmi_izvest: + cat = 'izvest' + print(bigram, cat, pmi_for_cats(bigram, cat)) + i += 1 +a=[] +for i in range (8): + a.append(input()) +b=[] +for i in range (0,7,2): + b.append(a[i]) + b.append(a[i+1]) + print (''.join(b)) + b=[] +import re +regsit=r"\bси(жу|ди((шь|м|те?)?)|е(л[и,а,о]?|ть|в(ш(и(й|е|х|ми?)?))|ая|е(й|е|му?|го)))?|я(т|щ(и(й|ми?|х|е)|е(е|й|го|му?)|ая|ую))\b" +def tekst(): + with open ('new.txt', 'r', encoding='utf-8') as f: + text=f.readlines() + found=[] + for line in text: + words=line.split() + for word in words: + wor=word.lower() + wor=wor.strip(".,?!:;-") + if re.search (regsit, wor)!=None and found.count(wor)==0: + found.append(wor) + print(wor) +tekst() +print ('Введите число') +n=int(input()) +k=1 +while k*2<=n: + k=k*2 + print (k) + continue +with open('freq.txt', 'r', encoding = 'utf-8') as f: + print ('Задание 1') + for line in f: + form = [] + form = line.split(' | ') + if form[1] =='союз': + print (' | '.join(form)) +with open('freq.txt', 'r', encoding = 'utf-8') as f: + sum=0 + fem = [] + morp = [] + print ('Задание 2') + for line in f: + form = [] + form = line.split(' | ') + morp = form[1].split(' ') + if len(morp)>2: + if morp[2]=='ед' and morp[3]=='жен': + fem.append(form[0]) + sum += float(form[2]) + print (', '.join(fem)) + print('Сумма ipm:', sum) +word = input ('Введите слово: ') +print (word) +while len(word) > 1: + word=word [1:len(word)-1] + print (word) +import re +reg=r'(\n\n

(.*?)(.*?)

' + m = re.search(links,content) + if m != None: + return m.group(3) +def result(): + with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: + result = open_s() + return file.write('Преподаватели:'+ result) +open_s() +result() +import re +def open_text(): + with open('Leskov.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip(',.”"?!-:;') + return arr +def words(): + arr = open_text() + return len(arr) +print(words()) +def frequency(): + arr = open_text() + d = {} + for i in range(len(arr)): + if arr[i] not in d: + d[arr[i]] = 1 + else: + d[arr[i]] +=1 + return d +def result(): + with open ('Результат.csv', 'w', encoding = 'utf - 8')as file: + d = frequency() + for key in sorted(d): + file.write(key + ',' + str(d[key])+ '\n') + return +result() +def phrase(): + with open('Leskov.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: + reg = re.findall(r'\b\w*аго \w*(?:и|а|ы)',text) + for reg + return reg +phrase() +def puzzle(): + phrase = {'незванный':'гость','розовый':'слон','вишнёвый':'сад', 'сиреневый':'туман', 'кленовый':'лист'} + for key in phrase: + for i in range(len(key)): + print(key + '...') + w = input('Я загадал слово ') + if w == phrase[key]: + return print('Ты выиграл') + else: + print ('Ты проиграл') + return +puzzle() +def open_text(): + with open('Книга1.csv', 'r', encoding = 'utf - 8') as f: + line= f.readline() + arr = line.split(';') + for i, w in enumerate(arr): + arr[i] = arr[i].strip('\n') + line = f.readline() + arr1 = line.split(';') + phrase = dict(zip(arr,arr1)) + return phrase +def puzzle(): + phrase = open_text() + for key in phrase: + for i in range(len(key)): + print(key + '...') + w = input('Я загадал слово ') + if w == phrase[key]: + return print('Ты выиграл') + else: + print ('Ты проиграл') + return +puzzle() +import re +def open_s(): + with open ('Лингвистика — Википедия.html','r', encoding = 'utf - 8')as f: + text = f.read() + return text +def lang_meat(): + m = open_s() + step = re.sub(r'\bязык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bшашлык\1',m, flags = re.DOTALL) + step2 = re.sub(r'\bЯзык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bШашлык\1', step, flags = re.DOTALL) + return step2 +def result(): + with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: + result = lang_meat() + return file.write(result) +result() +first = 0 +second = 0 +f = open( "Капибара.txt", "r", encoding = "utf-8") +for line in f: + arr = line.split() + for i in arr: + if len(i) == 3 and i[len(i)-1] != ',' and i[len(i)-1] != '.' and i[len(i)-1] != ':' and i[len(i)-1] != ';' and i[len(i)-1] != '!' and i[len(i)-1] != '?': + first += 1 + if len(i) == 4 and i[len(i)-1] == ',' for i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?': + first += 1 + if len(i) == 1 and i != '―': + second += 1 + if len(i) == 2 and i[len(i)-1] == ',' or i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?': + second +=1 +if second == 0: + print( 'Слов длины один нет') +else: + num = first/second +print(num) +f.close() +import re +import os +def folder(): + arr = [f for f in os.listdir('.')if re.search(r'[а-яёЁА-Я]+',f)if os.path.isdir(f)] + print(len(arr)) + return arr +folder() +def print_result(): + result = [] + for f in os.listdir('.'): + if os.path.isfile(f): + f = f[:f.rfind('.')] + if f not in result: + result.append(f) + else: + if f not in result: + result.append(f) + return ' '.join([str(i) for i in result]) +print(print_result()) +import os +import re +def text(): + for root, dirs, files in os.walk('.'): + for f in files: + if f.endswith('.xhtml'): + with open( f, 'r') as text: + text = text.read() + reg = re.findall(r'', text) + with open('Exam.txt', 'w', encoding = 'utf-8') as f2: + f2.write(f +'\t'+str(len(reg))+ '\n') + return +text() +def table(): + for f in os.listdir('.'): + with open( f, 'r') as text: + text = text.read() + reg1 = re.findall(r' ([А-Яа-яёЁ]*)\.', text) + reg2 = re.findall(r'([0-9]*)', text) + for i in reg1 and j in reg2: + with open ('Результат.csv', 'w', encoding = 'utf - 8')as file: + file.write( f + ',' + i + ',' + j+ ','+ '\n') + return +table() +n = 0 +f = open( "Цитаты.txt", "r", encoding = "utf-8") +for line in f: + arr = line.split ('—') + arr2 = arr[0].split() + if len(arr2) < 10: + print (arr[0]) +arr3 = line.split() +for i in arr3: + if i == 'разум': + n += 1 +print (n) +f.close() +import re +def open_s(): + with open ('Капибара — Википедия.html','r', encoding = 'utf - 8')as f: + text = f.read() + reg = r'(.*?)' + m = re.findall(reg,text) + return m +print (open_s()) +for link in links [:10]: + print(link[0] +for link in links[:10]: + print(link[2],'-->', link[1] +import os +def delete(dirname): + for root, dirs, files in os.walk(dirname): + for f in files: + os.remove(os.path.join(root, f)) + for d in dirs: + delete(os.path.join(root,d)) + os.rmdir(root) +delete('кот') +def print_tree(dirname, space = 0): + for root, dirs, files in os.walk(dirname): + print(''*root) + for i in files: + print(''*space,' **()'.format(i) + space += 2 +import os +def task_0(): + print(os.listdir('.')) +task_0() +def task_1(): + sent = input('Введите предложение:') + arr = sent.split() + path = '\\'.join([str(i) for i in arr]) + os.makedirs(path) +task_1() +def task_2(): + n = int(input()) + arr = [] + for i in range(n): + arr.append(i) + path = '\\'.join([str(i) for i in arr]) + os.makedirs(path) +task_2() +def count_tf(word, text): + return text.count(word) / len(text) +def count_df(word, texts): + n = [1 for text in texts if word in text] + return sum(n) +def count_idf(word, texts): + n = len(texts) / (1 + count_df(word, texts)) + return n +from math import log +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + return log(tf, 10) * log(idf, 10) +import re +punct = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + word = text_wo_punct.strip().split() + words = [i for i in word if len(i)>4 and re.search(r'[1-9]+', i) is None] + return words +import os +texts_dic = {} +for root, dirs, files in os.walk('wikipedia'): + for f in files[:50]: + with open(os.path.join(root, f), 'r', encoding='utf-8') as t: + text = preprocessing(t.read()) + texts_dic[f.split('.')[0]] = text +texts = list(texts_dic.values()) +for text in texts_dic: + print("Top words in document {}".format(text)) + scores = {} + for word in texts_dic[text]: + scores[word] = count_tfidf(word, texts_dic[text], texts) + sorted_words = sorted(scores.items(), key=lambda x: x[1]) + for word, score in sorted_words[:5]: + print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) +def open_text(): + with open('text.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip(',.?!-') + return arr + +def first_letter(letter): + arr = open_text() + for i in arr: + if letter == i[0]: + print (i) + else: + pass + return i +letter = input('Введите букву') + + + +def c(): + cont = input('Введите страну ') + d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'} + if cont in d: + return d[cont] + else: + return 'NO' + +def change(): + d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'} + d1 = {} + for key in d: + city = d[key] + d1[city] = key + return d1 + +def delete_doubles(): + d = { 'Петя': 12345, 'Пётр': 12345, 'Аня': 54321, 'Анна': 54321, 'Сёма': 13579} + arr = [] + d1 = {} + for key in d: + if d[key] in arr: + pass + else: + append + d1[key] = d[key] + return d1 +print(delete_doubles()) +import re +def open_text(): + with open('Жирафики.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip(',.?!-') + return arr +def giraf(): + s = input('Введите что-нибудь ') + regex = 'жираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?' + m = re.search(regex,s) + if m != None: + return 'Я нашёл' +print(giraf()) +def giraf_in_text(): + arr = open_text() + regex = r'\bжираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?\b' + m = re.search(regex,i[arr]) + s = 0 + for i[arr] in arr: + if m != None: + s += 1 + return s +print(giraf_in_text()) +import re +def open_s(): + with open ('Динозавры — Википедия.html','r', encoding = 'utf - 8')as f: + text = f.read() + return text +def find_dino(): + text = open_s() + reg = r'\b[Дд]инозавр[а-я]{0,5}' + m = re.findall(reg, text) + return m +print (find_dino()) +def no_html(): + text = open_s() + m = re.sub(u'<.*?>', u'', text, flags = re.DOTALL) + return m +print (no_html()) +def cat_dino(): + m = no_html() + n = re.sub(r'\bдинозавр',r'\bкот',m,flags = re.DOTALL) + n1 = re.sub(r'\bДинозавр',r'\bКот',n, flags = re.DOTALL) + return n1 +print(cat_dino()) +import re +def open_text(): + with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = re.sub(r'\n',' ', text) + arr = re.split('\.|\?|\! ', text) + for i, s in enumerate(arr): + arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i]) + return arr +def words_5(): + arr = open_text() + for i in range(len(arr)): + arr1 = arr[i].split() + template = '{}_{}' + length =[template.format(arr1[i],len(arr1[i])) for i,w in enumerate(arr1)] + for i in range(len(length)): + print(length[i]) + return +words_5() +import re +def open_text(): + with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = re.sub(r'\n',' ', text) + arr = re.split('\.|\?|\! ', text) + for i, s in enumerate(arr): + arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i]) + return arr +def words_5(): + arr = open_text() + for i in range(len(arr)): + arr1 = arr[i].split() + template = '{}_{}' + length ={print(template.format(arr1[i],len(arr1[i]))) for i,w in enumerate(arr1)} + return +words_5() +def open_text(): + with open('Austen_Jane.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip(',.”"?!-:;') + return arr +def words(): + arr = open_text() + arr1 = [] + for i,w in enumerate(arr): + if arr[i][-4:] == 'hood': + arr1.append(arr[i]) + return arr1 +def number_of_words(): + arr1 = words() + return len(arr1) +def the_minimum_frequency(): + arr = open_text() + arr1 = words() + y = 1 + n = 0 + for i,w in enumerate(arr1): + y = min(y, arr1.count(arr1[i])) + for i,w in enumerate(arr1): + if y == arr1.count(arr1[i]): + n = i + return arr1[n] +def base(): + arr1 = words() + arr2 = [] + for i,w in enumerate(arr1): + x = arr1[i].rfind('h') + arr2.append(arr1[i][:x]) + return arr2 +print(number_of_words()) +print(the_minimum_frequency()) +print(' '.join(map(str,(base())))) +arr = [] +word = input('Введите слово') +while word: + arr.append (word) + word = input( 'Введите слово') +for w in range(len(arr)): + print(arr[w][w+1::]) +word=input("Введите слово: ") +n=[] +for i in range(len(word)): + n=word[len(word)-i:] + n+=word[i:] + print(n) +print ("Введите число") +num=int(input()) +print ("Введите слово") +w=input() +while w!= "программирование" and w!= "программирование": + for i in range(num): + print (w) + print ("Введите число") + num=int(input()) + print ("Введите слово") + w=input() +print ("Конец") +w = [] +while True: + word =(input('Введите латинское слово: ')) + if len (word) ==0: break + elif word[-2:]== 're' or word [-2:]=='ri': + w.append(word) +for i in range (len(w)): + print (w[i]) +def open_text(name): + with open (name+'.txt', 'r', encoding ='utf-8') as f: + text=f.read() + ntext=text.lower() + words=ntext.split(' ') + for i,word in enumerate (words): + words[i]=word.strip('.,!?-') + return words +def edwords(a): + ed=[] + edlist=int() + for i,word in enumerate (a): + if word.endswith('ed'): + ed.append(word) + edlist+=1 + print ('Количество форм на -ed равно',str(edlist)) + return (ed) +def iedwords(b): + iedlist=int() + for i,word in enumerate (b): + if word.endswith('ied'): + iedlist+=1 + print ('Количество форм, образованных от глаголов на -у или -е равно',str(iedlist)) + return () +def end(): + name=input('Введите название файла: ') + a=open_text(name) + b=edwords(a) + c=iedwords(b) + return (c) +u=end() +with open("text.txt", "r", encoding="utf-8") as f: + text=f.read() + words=text.split(' ') + words_num=len(words) + letters=list(text) + marks_num=int() + for i in letters: + if i=="." or i==",": + marks_num+=1 + percent=marks_num/words_num*100 + print('Процент слов, имеющих знак препинания: ', round(percent)) +import random +def bigram(): + b={} + with open ('text.csv', 'r') as f: + lines=f.readlines() + for line in lines: + line=line.split(',') + b[line[0]]=line[1] + return(b) +def dots(w): + res='' + for i in range(len(w)): + res+='. ' + return res +def rand(b): + k=list(b.keys()) + return random.choice(k) +print ('Сейчас мы сыграем в игру "Угадай слово"!') +big=bigram() +word=rand(big) +print ("Подсказка:") +print (big[word]+' '+ dots(big[word])) +answer=input('Как вы думаете, что это за слово? ') +if answer==word: + print ("Правильно!") +else: + print ('Увы, неправильно!') +import os +import re +def sents(): + news = 'news' + sent = {} + for n in os.listdir(news): + with open(os.path.join(news, n), encoding='cp1251') as text: + text = text.read() + sent[n] = len(re.findall('', text)) + + return (sent) +def new (sent): + + with open('new_file', 'w', encoding = 'utf-8') as new: + for s in sent: + new.write(s+'\t'+str(sent[s])+'\n') +new(sents()) +import re +def oh(): + lines=int() + with open ('text.txt', 'r', encoding='utf-8') as first: + old_text=first.readlines() + for line in old_text: + if '' in line: + break + else: + lines+=1 + with open ('endtext.txt', 'w', encoding='utf-8') as second: + lines1=str(lines) + second.write(lines1) +def oops(): + slov={} + with open ('text.txt', 'r', encoding='utf-8') as first: + old_text=first.readlines() + for line in old_text: + if " 2) and (usl[2] == 'ед') and (usl[3] == 'жен'): + mass.append(arr[0]) + sum += float(arr[2]) + print(', '.join(mass)) + print('Сумма ipm = ', sum) +n=input('Введите любое число. ') +n=int(n) +while n != 0 : + sl=input('Введите любое слово. ') + if sl == 'программирование': + break + print(sl) + n=n-1 +word = input("Введите слово: ") +for k in range(len(word)): + newword = (word[-k: ] + word[ :-k]) + print(newword) +import re +import os + +def sent_count(): + path = './news/' + for root, dirs, files in os.walk(path): + for f in files: + with open(os.path.join(root, f), 'r', encoding = 'cp1251') as t: + text = t.read() + mass = [] + mass = text.split('\n') + s_count = 0 + for i in mass: + if re.search('', i): + s_count += 1 + with open('result.txt', 'a', encoding = 'utf-8') as file: + file.write(f +'\t' + str(s_count) + '\n') + +def write_csv(): + with open('result.csv', 'w', encoding = 'utf-8') as file: + output = csv.writer(file, delimiter = ',') + head = ['Название файла', 'Автор', 'Тематика текста'] + path = './news/' + for root, dirs, files in os.walk(path): + for f in files: + with open(os.path.join(root, f), 'r', encoding = 'utf-8') as t: + text = t.read() + if re.search('', text): + auth = re.search('', text).group(1) +def main(): + sent_count() +main() +s = 8 +p = input ("введите число") +p = int (p) +while p!=s: + if p < s: + print ("больше") + else: + print ("меньше") + p = input ("ещё раз") + if len (p) == 0: + print ("всё") + break + p = int (p) +if p==s: + print ("вы выиграли") +print (range (10)) +import os +direct = 'D:\Downloads\news.zip\news' +files = os.listdir(direct) +for file in files: + f = open(file, 'r') + sent = 0 + for line in f: + if '. ' in line: + sent += 1 + f.close() + f = open('sent.txt', 'a') + f.write(file, ' ', sent, '\n') + f.close() +A = [0] * 7 +for i in range(7): + A[i] = int(input("введите число")) +for i in range(7): + B = ["X"] * A[i] + if A[i] < 0: + print ("введено отрицательное число") + else: + print (''.join([str(i) for i in B])) +s = input ("введите число") +s = int (s) +for i in range (10): + i += 1 + p = i*s + print (i, "*", s, "=", p) +words = 0 +cap_words = 0 +for line in open('text.txt','r', encoding='utf-8'): + word = line.split(' ') + for word in open('text.txt','r', encoding='utf-8'): + words +=1 + if word.istitle(): + cap_words += 1 +s = (cap_words/words)*100 +print ("слов, начинающихся с заглавной буквы", s, "%") +a = input ("введите a") +b = input ("введите b") +c = input ("введите c") +a = int (a) +b = int (b) +c = int (c) +if c == a % b: + print ("a даёт остаток c при делении на b") +else: + print ("a НЕ даёт остаток c при делении на b") +if c == a/b: + print ("a разделить на b равно c") +else: + print ("a разделить на b НЕ равно c") +def linecount (text): + lines = 0 + for line in text: + lines += 1 + return lines +def freq (text): + A = dict() + for line in text: + if " 3): + dic[element] += 1 + elif len(element)> 3: + dic[element] = 1 + return dic +def delete (dic): + dic1=dic.copy() + for word in dic1: + if dic1[word] == 1: + del dic[word] + return dic +corpus_freq = freq_dict(words) +anek_freq = freq_dict(words_anek) +izvest_freq = freq_dict(words_izvest) +teh_freq = freq_dict(words_teh) +delete (corpus_freq) +delete(anek_freq) +delete(izvest_freq) +delete(teh_freq) +def pmi_for_cats(x, y): + if y == 'anek': + dic = anek_freq + arr = words_teh + words_izvest + num = num_anek + elif y == 'teh': + dic = teh_freq + arr = words_anek + words_izvest + num = num_teh + elif y == 'izvest': + dic = izvest_freq + arr = words_teh + words_anek + num = num_izvest + p_xy = dic[x]/len(arr) + p_x, p_y = corpus_freq[x]/len(words), num/(num_izvest + num_teh + num_anek) + pmi = log(p_xy/(p_x * p_y)) + return pmi +cat_pmi = {} +i = 0 +for word in corpus_freq: + if i > 100: + break + try: + pmi_anek = pmi_for_cats(word, 'anek') + except KeyError: + pmi_anek = 0 + try: + pmi_teh = pmi_for_cats(word, 'teh') + except KeyError: + pmi_teh = 0 + try: + pmi_izvest = pmi_for_cats(word, 'izvest') + except KeyError: + pmi_izvest = 0 + max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) + if max_pmi == 0: + continue + if max_pmi == pmi_anek: + cat = 'anek' + elif max_pmi == pmi_teh: + cat = 'teh' + elif max_pmi == pmi_izvest: + cat = 'izvest' + print(word, cat) + i += 1 +word = input ("Введите слово: ") +a = 0 +b = len (word) +while word [a:b] != "": + print (word [a:b]) + a += 1 + b -=1 +A = [] +i = 0 +print ('Enter 8 words') +while i < 9: + list.append(A, input()) + i+=1 +i = 1 +while i < 9: + print (A[i-1]+A[i]) + i+=2 +a = int (input ("Введите число a: ")) +b = int (input ("Введите число b: ")) +c = int (input ("Введите число c: ")) +if (a/b == c): + print ("a / b = c") +else: print ("a / b != c") +if (a ** b == c): + print ("a ^ b = c") +else: print ("a ^ b != c") +def questions(): + file = input('Введите название файла на английском: ') + leng = int(input('Введите длину слова: ')) + quant = open_file(file) + output = perc(quant, leng) + return output +def open_file(file): + f = open(file, 'r') + file = f.read() + file = file.split() + return file +def perc(quant, leng): + i = 0 + j = 0 + for item in quant: + if item.startswith('un'): + i += 1 + if len(item) > leng: + j += 1 + if i != 0: + print ('Количество слов, начинающихся с un-, в тексте: ', i) + return round(j / i * 100) + else: + return 'В тексте нет слов, начинающихся на un-' +print('Проценты: ', questions()) +import random +def read (): + f = open('text.txt', 'r') + l = f.readlines() + return l +def array (numb): + a = read()[numb].split() + return a +def noun2 (): + return random.choice(array(0)) +def noun3 (): + return random.choice(array(1)) +def noun4 (): + return random.choice(array(2)) +def imper2 (): + return random.choice (array(3)) +def imper3 (): + return random.choice(array(4)) +def imper4 (): + return random.choice(array(5)) +def verb2 (): + return random.choice(array(6)) +def verb3 (): + return random.choice(array(7)) +def verb4 (): + return random.choice(array(8)) +def adverb1 (): + return random.choice (array(9)) +def adverb2 (): + return random.choice (array(10)) +def adverb3 (): + return random.choice (array(11)) +def adverb4 (): + return random.choice (array(12)) +def punct(): + marks = [".", "?", "!", "..."] + return random.choice(marks) +def verse_5_1 (): + return imper3() + ' ' + noun2() + punct() +def verse_5_2 (): + return imper2() + ' ' + noun3() + punct() +def verse_5_3 (): + return verb2() + ' ' + noun3() + punct() +def verse_5_4 (): + return verb3() + ' ' + noun2() + punct() +def verse_5_5 (): + return adverb1() + ' ' + verb2() + ' ' + noun2() + punct() +def verse_5_6 (): + return adverb1() + ' ' + imper4() + punct() +def verse_5_7 (): + return adverb2() + ' ' + imper3() + punct() +def verse_5_8 (): + return adverb3() + ' ' + imper2() + punct() +def verse_7_1 (): + return imper3() + ' ' + noun4() + punct() +def verse_7_2 (): + return imper4() + ' ' + noun3() + punct() +def verse_7_3 (): + return verb3() + ' ' + noun4() + punct() +def verse_7_4 (): + return verb4() + ' ' + noun3() + punct() +def verse_7_5 (): + return adverb1() + ' ' + verb3() + ' ' + noun3() + punct() +def verse_7_6 (): + return adverb1() + ' ' + verb4() + ' ' + noun2() + punct() +def verse_7_7 (): + return adverb1() + ' ' + verb2() + ' ' + noun4() + punct() +def verse_7_8 (): + return adverb2() + ' ' + verb2() + ' ' + noun3() + punct() +def verse_7_9 (): + return adverb2() + ' ' + verb3() + ' ' + noun2() + punct() +def make_verse_5 (): + verse = random.choice([1,2,3, 4, 5, 6, 7, 8]) + if verse == 1: + return verse_5_1() + elif verse == 2: + return verse_5_2() + elif verse == 3: + return verse_5_3() + elif verse == 4: + return verse_5_4() + elif verse == 5: + return verse_5_5() + elif verse == 6: + return verse_5_6() + elif verse == 7: + return verse_5_7() + else: + return verse_5_8() +def make_verse_7 (): + verse = random.choice([1,2,3, 4, 5, 6, 7, 8, 9]) + if verse == 1: + return verse_7_1() + elif verse == 2: + return verse_7_2() + elif verse == 3: + return verse_7_3() + elif verse == 4: + return verse_7_4() + elif verse == 5: + return verse_7_5() + elif verse == 6: + return verse_7_6() + elif verse == 7: + return verse_7_7() + elif verse == 8: + return verse_7_8() + else: + return verse_7_9() +print(make_verse_5()) +print(make_verse_7()) +print(make_verse_5()) +print(make_verse_7()) +print(make_verse_7()) +num = int (input ("Введите натуральное чиcло: ")) +i = 1 +while 2**i < num: + print (2**i) + i+=1 + + +import re +def read_file(): + with open ('corp.txt', 'r', encoding='UTF-8') as file: + text=file.read() + file.close() + return text +def counter(): + file = open('corp.txt', 'r', encoding='UTF-8') + i=1 + for line in file: + if '' not in line: + i+=1 + else: + break + file.close() + return i +def five_points(): + new_file=open('подсчет строк.txt', 'w', encoding='utf-8') + text=new_file.write('Число строк заголовка: ' + str(counter())) + new_file.close() +def dictionary(): + d={} + wordlist=[] + lemmas=re.findall('>\w+', read_file()) + for lemma in lemmas: + lemma=lemma.strip('>') + wordlist.append(lemma) + for word in wordlist: + if word in d: + d[word]+=1 + else: + d[word]=1 + return d +def eight_points(): + d=dictionary() + dic_file=open('словарик.txt', 'w', encoding='utf-8') + for key in d: + text=dic_file.write(key+' - '+ str(d[key])+' \n') + dic_file.close() +def ten_points(): + formlist=[] + file = open('corp.txt', 'r', encoding='UTF-8') + for line in file: + pronom=re.search('type="(f.h.+?)"', line) + if pronom != None: + find=pronom.group(1) + formlist.append(find) + return formlist +five_points() +eight_points() +print ('Загляните в папку с программой и попробуйте найти в ней новые txt-файл.') +print (ten_points())import os + + + + +def lists_creator(): + aaa=[] + punct=[] + file_list=[files for root, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки')] + for folder in file_list: + for file in folder: + q_a=0 + q_punct=0 + for word in file: + for letter in word: + if letter=='a' or letter =='A' or letter =='А' or letter =='а': + q_a+=1 + if letter =='.' or letter ==',' or letter =='?' or letter =='!' or letter =='(' or letter == ')' or letter =='-': + q_punct+=1 + if q_a>3: + aaa.append(file) + if q_punct-1>0: + punct.append(file) + print ('+++++++++++++Файлы, в которых большк 3х "а":+++++++++++++') + for el in aaa: + print (el) + print ('+++++++++++++Файлы со знаками препинания в названии:+++++++++++++') + for el in punct: + print(el) + + + + + + + + + + +def kracuvo(): + for roots, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки'): + for dir in dirs: + print('--',dir) + path='/home/lera/Рабочий стол/Загрузки'+'/'+str(dir) + for file in os.listdir(path): + print (' ', file) + + +print(os.path.join('дз ап', 'morozova3.docx'))import os +import re +def s_counter_5(): + s_result = open('res.txt', 'w', encoding='utf-8') + for root, dirs, files in os.walk('news'): + s_result = open('res.txt', 'w', encoding='utf-8') + for file in files: + with open(os.path.join('news',file), 'r') as f: + file_text = f.read() + q=0 + for line in file_text: + if line=='.' or line=='?' or line=='!': + q+=1 + text=s_result.write(file +'\t'+str(q)+ '\n') + s_result.close() + return s_result +def table_8(): + table = open('table.csv', 'w', encoding='utf-8') + for root, dirs, files in os.walk('news'): + for file in files: + with open(os.path.join('news',file), 'r') as f: + file_text = f.read() + info=re.findall('.+', file_text) + for el in info: + a=re.search('>([a-яА-Я]+.[a-яА-Я]+)?\.', el) + if a!=None: + author=a.group(1) + else: + author='no author' + text=table.write(author+'\n') + + + + +s_counter_5 +table_8() + + +import re +def file_name(): + print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список словоформ:') + name=input() + return name +def read_file(): + wordlist=[] + file = open(file_name(), 'r', encoding='UTF-8') + for line in file: + linelist=line.split() + for word in linelist: + word=word.lower() + word=word.strip('.,:;"«»-?()!') + wordlist.append(word) + file.close() + return wordlist +def form_finder(): + form_list=[] + for word in read_file(): + form=re.search('(не(до)?|под)?вып[еиь]([йтлеюи]|(вш))[мшьаоиыуе]?(го|м(у|и)?[ейяюх])?(ся)?', word) + if form!=None: + find=form.group() + form_list.append(word) + return form_list +def list_without_repetitions(): + list=form_finder() + for el in list: + el_new=el + for el in list: + if el_new==el: + list.remove(el) + return list +for el in list_without_repetitions(): + print(el)print ('Введите число') +n=int(input()) +for i in range(n): + print('Введите слово') + a=input() + print ('Ваше слово:', a) + if a=='программирование': + break +print ('Цикл завершен')f=open('wordlist.txt', 'r', encoding='utf-8') +for line in f: + arr = line.split() + for i,word in enumerate(arr): + arr[i] = word.strip('.,?!;:-"') + for el in arr: + el=el.lower() + print (el) + + + + +import random +def open_file(): + file = open('wordlist.txt', 'r', encoding='UTF-8') + lines = file.readlines() + file.close() + return lines +def random_word(lines): + ugly_word = random.choice(lines) + word = ugly_word.strip('\n') + return word +def syllable_counter(word): + syl_quan=0 + for letter in word: + if letter=='e' or letter=='y' or letter=='u' or letter=='i' or letter=='o' or letter=='a' or letter=='é' or letter=='è' or letter=='ê' or letter=='à' or letter=='â' or letter=='ù' or letter=='û' or letter=='ô' or letter=='î': + syl_quan+=1 + return syl_quan +def line_creator(syl_number): + syl_max = syl_number + line = '' + while syl_max >= 0: + word = random_word(open_file()) + syl_quan = syllable_counter(word) + syl_max -= syl_quan + if syl_max > 0: + line=line+' '+ word + continue + elif syl_max == 0: + line = line + ' ' + word + break + elif syl_max < 0: + line = '' + syl_max = syl_number + continue + punctuation=['!','.','?'] + phrase=line[1].upper()+line[2:]+random.choice(punctuation) + print (phrase) +def main(): + print('\nThere you can see one more perfect creation:\n') + line_creator(5) + line_creator(7) + line_creator(5) +if __name__ == '__main__': + main() + + + + +import os +import re +for item in files: + file_name=item.split('.') + if len(file_name[0])==5: + lat=re.search('[A-Za-z]{5}', file_name[0]) + if lat!=None: + i+=1 + if file_name[0] not in name_base: + name_base.append(file_name[0]) +print ('Число файлов с названием из пяти латинских символов: ',i) +print ('\nСписок названий найденных файлов (без повторов):') +for el in name_base: + print (el) + + +import os +dirlist = [el for root, dirs, files in os.walk('.') for el in dirs] +stat = {} +letters = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбю' +letter = [name[0].lower() for name in dirlist] +for el in letter: + if el not in letters: + letter.remove(el) + if el in stat: + stat[el] += 1 + else: + stat[el] = 1 +i = 0 +res = 0 +for value in stat: + if stat[value] > i: + i = stat[value] + res = value +if i==0: + print ('Названий, начинающихся с букв, похоже, тут нет :(') +else: + print('Чаще всего названия папок начинаются с буквы:', res, '\nТакие названия встречаются', i, 'раз(a)') +word=input('Введите слово: ') +if word: + for i in range(len(word)): + print (word[i:]+word[:i]) + if i>len(word)-1: + break +else: + print ('Нет входных данных')def read_words(): + wordlist=[] + file = open('austen.txt', 'r', encoding='UTF-8') + for line in file: + linelist=line.split() + for word in linelist: + wordlist.append(word) + file.close() + return wordlist +def counter(part): + quan=0 + for word in read_words(): + if word[-len(part):]==part: + quan+=1 + return quan +print ('Число форм в данном тексте, оканчивающихся на -ed: ',counter('ed')) +print ('Из них - правильные глаголы в прошедшем времени на -y:',counter('ied'))import re +def file_name(): + print ('Поместите файл в одну папку с данной программой.\n��ведите имя файла, чтобы получить список cфер деятельности данного ученого:') + name=input() + return name +def reader(): + list=[] + file = open(file_name(), 'r', encoding='UTF-8') + for line in file: + line=line.strip('\n') + list.append(line) + file.close() + return list +def str_sphere(): + infobox=reader() + sphere='' + q=0 + for line in infobox: + if 'Научная сфера:' in line: + sphere=infobox[q+2] + break + else: + q+=1 + return sphere +def main(): + form=re.findall('>[а-я -]+', str_sphere()) + list='' + for el in form: + el=el.strip('>.+',i) + form2=re.findall('ana',i) + for el in form2: + num_ana+=1 + if form1!=None: + num_w+=1 + koef=num_ana/num_w + return koef + + + +def freq_dict_8(): + d={} + list=[] + new_list=[] + for i in reader(): + form=re.search('gr="(.+)"',i) + if form!=None: + list.append(form.group(1)) + for el in list: + i = el.split(',') + new_list.append(i) + keys=[item[0].strip('=qwertyuiopasdfghjklzxcvbnm/<>" ') for item in new_list if item!='NUM=nom" /> 0 : + print('X'*nlist[i]) + else: + print('') + i += 1 + + + + + + + + + + + +import re +def openfile_lines(fname): + with open(fname, 'r', encoding = 'utf-8') as f: + lines = f.readlines() + return lines +def find_words(lines): + words = [] + for i in range(len(lines)): + if re.search('(.+?)<', words[i]): + found_lemma = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(1) + found_type = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(2) + found_form = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(3) + pure.append([found_lemma, found_type, found_form]) + return pure +def count_forms(words): + freq = {} + for i in range(len(words)): + form = re.search('type="(.+?)"', words[i]).group(1) + if form not in freq: + freq[form] = 1 + else: + freq[form] += 1 + return freq +def plural_adjectives(freqs): + forms = list(freqs.keys()) + pluradj = [] + for i in range(len(forms)): + if re.search('l.f.*', forms[i]): + adj_form = re.search('l.f.*', forms[i]).group() + if adj_form: + pluradj.append(adj_form) + pluradj_freq = {} + for i in range(len(pluradj)): + pluradj_freq[pluradj[i]] = freqs[pluradj[i]] + return pluradj_freq +def main(): + lines_dict = openfile_lines('dict.txt') + word_list = find_words(lines_dict) + pure_info = purify_info_about_words(word_list) + freq_dict = count_forms(word_list) + pluradj_freq_dict = plural_adjectives(freq_dict) + with open('lines.txt', 'w', encoding = 'utf-8') as f: + f.write(str(len(lines_dict))) + with open('word forms.txt', 'w', encoding = 'utf-8') as f: + f.write('\n'.join(freq_dict.keys())) + with open('plural adjectives frequencies.txt', 'w', encoding = 'utf-8') as f: + text = '' + for key in pluradj_freq_dict: + text += str(key)+' '+str(pluradj_freq_dict[key])+'\n' + f.write(text) + with open('dictionary.csv', 'w', encoding='utf-8') as f: + header = ['лемма', 'грамматическая форма', 'словоформа'] + f.write(','.join(header)+'\n') + for i in range(len(pure_info)): + f.write(','.join(pure_info[i])+'\n') +if __name__ == '__main__': + main() + + + + + + + +import csv +def main(): + clues = {} + with open('clues.csv', 'r', encoding='utf-8') as f: + text = csv.reader(f, delimiter=',') + for row in text: + clues[row[0]] = row[1] + n = 0 + keys = list(clues.keys()) + while n < len(clues): + i = 0 + while i <= len(keys[n]): + if i < len(keys[n]): + response = input(keys[n]+'...') + if response == clues[keys[n]]: + print('Правильно!') + n += 1 + break + else: + print('Неправильно. У тебя ещё '+str(len(keys[n]) - i+1)+' попыток.') + i += 1 + elif i == len(keys[n]): + response = input(keys[n]+'...') + if response == clues[keys[n]]: + print('Правильно!') + n += 1 + break + else: + print('У тебя закончились попытки. Правильный ответ: '+keys[n]+' '+clues[keys[n]]) + n += 1 +if __name__ == '__main__': + main() + + + + + + + + + + +n = int(input('Введите целое положительное число.')) +index = 0 +while index < n: + index +=1 + word = input('Введите слово.') + if word == 'программирование': + break + print(word) + + + + + + +word = input('Введите слово в русской раскладке.') +index = 0 +while index < len(word): + index += 1 + if word[len(word) - index] != 'з' and word[len(word) - index] != 'я': + print(word[len(word) - index]) + + + + + + + + + + + + + + + +import os +import re +import csv +def open_file_texts(directory): + raw_texts_dict = {} + for root, dirs, files in os.walk(directory): + for f in files: + with open(os.path.join(root, f), 'r', encoding='windows-1251') as t: + text = t.read() + raw_texts_dict[f] = text + return raw_texts_dict +def get_sentences(text): + sentences = re.findall('(.|\n)+?', text) + return sentences +def write_out_count_sentences(file_texts_dict): + with open('amount of sentences.txt', 'w', encoding='utf-8') as f: + for filename in file_texts_dict: + text = file_texts_dict[filename] + sent_am = len(get_sentences(text)) + f.writelines(filename+'\t'+str(sent_am)+'\n') +def get_words(raw_text): + + word_list = [] + raw_lines = raw_text.split() + word_lines = re.findall('(.+?)((?:\n?[«»,.! \?\-])*)', raw_text) + for i in range(len(word_lines)): + line = word_lines[i][0].strip('').strip('') + ana = ana.strip('>').strip().strip('ana').strip() + word_list.append([word] + [word_lines[i][1].strip().strip(' ')] + [ana]) + return word_list +def create_clear_text_out_of_words(word_list): + text = [] + for el in range(len(word_list)): + word = word_list[el] + d = re.match('\d+', word[2]) + if '«' in word[2]: + text.append(word[0] + ' «') + elif d: + text.append(word[0] + ' ' + d.group(0) +' ') + else: + text.append(word[0] + word[2] + ' ') + return text +def find_file_meta (file_texts_dict): + file_meta_list = [] + for filename in file_texts_dict: + text = file_texts_dict[filename] + author = re.search('', text) + if author: + author = re.search('', text).group(1) + topic = re.search('', text) + if topic: + topic = re.search('', text).group(1) + file_meta_list.append([filename, author, topic]) + return file_meta_list +def write_out_file_meta (file_meta_list): + with open('file metadata.csv', 'w', encoding='utf-8') as n: + text = csv.writer(n, delimiter=';') + header = ['Название файла', 'Автор', 'Тематика текста'] + text.writerow(header) + for row in file_meta_list: + text.writerow(row) +def find_spec_bigr_in_sentence(word_list): + spec_bigr = [] + for i in range(len(word_list)): + word = word_list[i] + if i > 0: + previous_word = word_list[i-1] + if 'loc' in word[2] and 'PR' in previous_word[2]: + spec_bigr.append(previous_word[0]+' '+word[0]) + return spec_bigr +def find_all_spec_bigr(raw_texts_dict): + sbec_bigr = [] + texts = raw_texts_dict.values() + for text in texts: + sentences = get_sentences(text) + for sentence in sentences: + sentence_word_list = get_words(sentence) + sentence_spec_bigr = find_spec_bigr_in_sentence(sentence_word_list) + context = create_clear_text_out_of_words(sentence_word_list) + for bigr in sentence_spec_bigr: + sbec_bigr.append([bigr, context]) + return sbec_bigr +def write_out_spec_bigr(spec_bigr): + with open('bigrams.txt', 'w', encoding='utf-8') as f: + for bigr in spec_bigr: + f.writelines(bigr[0]+'\t'+bigr[1]+'\n') +def main(): + raw_texts_dict = open_file_texts('news') + write_out_count_sentences(raw_texts_dict) + file_meta = find_file_meta(raw_texts_dict) + write_out_file_meta(file_meta) + spec_bigr = find_all_spec_bigr(raw_texts_dict) + write_out_spec_bigr(spec_bigr) +if __name__ == '__main__': + main() + +words = [] +with open('words.txt','r', encoding = 'utf-8') as f: + text = f.read() + words = text.split('\n') +for i in range(len(words)): + if ' союз ' in words[i]: + print(words[i]) + +words = [] +with open('words.txt','r', encoding = 'utf-8') as f: + text = f.read() + words = text.split('\n') +feminin = [] +ipm = 0 +word = '' +gram = '' +ipmi = '' +for i in range(len(words)): + if 'сущ' in words[i] and 'жен' in words[i]: + feminin.append(words[i]) + word, gram, ipmi = words[i].split('|') + ipm += float(ipmi) +for i in range(len(feminin)): + print(feminin[i]+',') +print(ipm) + + +words = [] +with open('words.txt','r', encoding = 'utf-8') as f: + text = f.read() + words = text.split('\n') +words1 = [] +word = input('Print any russian word. ') +while word: + words1.append(word) + word = input('Print any russian word. ') +for i in range(len(words1)): + check = 0 + for x in range(len(words)): + if words[x].count('|') == 2: + word, gram, ipmi = words[x].split('|') + if words1[i] == word.strip(' '): + print('grammar:', gram.strip(' ')+',' , 'ipm =', float(ipmi)) + check = 1 + if check == 0: + print('This word was not find in the dictionary.') + + + + + + + + + + + + +text = input('Type something: ') +for i in range(len(text)): + print(text[i:]+text[:i]) + + + + + +import re +def match_verb_forms(line): + infinitive = re.match(r'программировать(ся)?', line, re.I) + future = re.match(r'буд(е(шь|те?|м)|ут?) программировать', line, re.I) + present = re.match(r'программиру(ю|(е(те?|м|шь)))', line, re.I) + past = re.match(r'программировал(а|и)?', line, re.I) + past_participle = re.match(r'программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) + present_participle = re.match(r'программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) + transgressive_active = re.match(r'программируя', line, re.I) + transgressive_passive_past = re.match(r'будучи программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) + transgressive_passive_present = re.match(r'будучи программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) + if infinitive and not future: + match = infinitive + elif future: + match = future + elif present: + match = present + elif past: + match = past + elif past_participle: + match = past_participle + elif present_participle: + match = present_participle + elif transgressive_active: + match = transgressive_active + elif transgressive_passive_past and not past_participle: + match = transgressive_passive_past + elif transgressive_passive_present and not present_participle: + match = transgressive_passive_present + else: + match = None + return match +def open_forms(fname): + forms = [] + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + forms = text.split() + for i in range(len(forms)): + forms[i] = forms[i].strip('.,?*()«»') + return forms +def main(): + matches = [] + forms = open_forms('test.txt') + for i in range(len(forms)-1): + if i < len(forms): + if match_verb_forms(forms[i] +' '+ forms[i+1]): + if match_verb_forms(forms[i] +' '+ forms[i+1]).group() not in matches: + matches.append(match_verb_forms(forms[i] +' '+ forms[i+1]).group()) + else: + if match_verb_forms(forms[i]): + if match_verb_forms(forms[i]).group()not in matches: + matches.append(match_verb_forms(forms[i]).group()) + print(*matches) +if __name__ == '__main__': + main() + + + +import re +import os +import shutil +flist = os.listdir(os.getcwd()) +clist = [] +cfcount = 0 +for n in flist: + cyrillic = 1 + name = n.split('.')[0] + for let in name: + if not re.match('[А-Яа-яЁё]',let): + cyrillic = 0 + if cyrillic == 1: + if os.path.isdir(n): + cfcount += 1 + if name not in clist: + clist.append(name) +print(cfcount) +print(clist) + + + + + + + + + + + + + +import re +import csv +def open_file(name): + with open(name, 'r', encoding='utf-8') as f: + file_text = f.read() + return file_text +def get_words(raw_text): + word_arr = [] + raw_lines = raw_text.split() + word_lines = re.findall('(.+)((?:\n?[«»,.! \?\-])*(?:\n?[01234567])*)', raw_text) + for i in range(len(word_lines)): + line = word_lines[i][0].strip('').strip(' 0: + line[e] = line[e].strip(' />') + word_arr.append([line[0]] + [len(line)-1] + [word_lines[i][1].strip().strip(' ')] + line[1:]) + return word_arr +def count_average_anas(word_arr): + total = 0 + average = 0 + for i in range(len(word_arr)): + total += word_arr[i][1] + average = total/len(word_arr) + return average +def count_all_pos(word_arr): + pos_dict = {} + for i in range(len(word_arr)): + for el in range(len(word_arr[i])): + if el > 2: + pos = re.search('gr="(\w+)', word_arr[i][el]).group(1) + if pos not in pos_dict: + pos_dict[pos] = 1 + else: + pos_dict[pos] += 1 + with open('parts of speech frequency.txt', 'w', encoding='utf-8') as f: + for pos in pos_dict: + f.writelines(pos+'\t'+str(pos_dict[pos])+'\n') + return pos_dict +def make_text(word_arr): + text = [] + for el in range(len(word_arr)): + word = word_arr[el] + d = re.match('\d+', word[2]) + if '«' in word[2]: + text.append(word[0] + ' «') + elif d: + text.append(word[0] + ' ' + d.group(0) +' ') + else: + text.append(word[0] + word[2] + ' ') + return text +def find_all_instr(word_arr, text): + instr_words_dict = {} + for n in range(len(word_arr)): + word = word_arr[n] + for i in range(len(word)): + if i > 1: + instr = re.search('ins', word[i]) + if instr: + if word[0] not in instr_words_dict: + instr_words_dict[word[0]] = [n] + print(instr_words_dict[word[0]]) + elif n not in instr_words_dict[word[0]]: + instr_words_dict[word[0]] += [n] + print(instr_words_dict[word[0]]) + with open('words in instrumentalis.txt', 'w', encoding='utf-8') as f: + for word in instr_words_dict: + for i in range(len(instr_words_dict[word])): + x = instr_words_dict[word][i] + y = min(x+4, len(text)-1) + f.writelines(''.join(text[x-3:x])+'\t'+word+'\t'+''.join(text[x+1:y])+'\n') + return instr_words_dict +def main(): + raw_text = open_file('text.xml') + word_arr = get_words(raw_text) + average_anas = count_average_anas(word_arr) + print(average_anas) + count_all_pos(word_arr) + find_all_instr(word_arr, make_text(word_arr)) +if __name__ == '__main__': + main() + + + + + + + +import re +def open_text_phrases(fname): + phrases = [] + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = re.sub('\.\.\.|[\.\?]', '!', text) + phrases = text.split('!')[:-1] + for i in range(len(phrases)): + phrases[i] = re.sub('[<>\*\.«»,\'\"]','', phrases[i]) + phrases[i] = phrases[i].strip() + return phrases +def main(): + phrase_list = open_text_phrases('text.txt') + word_length_list = [[w, len(w)] for phrase in phrase_list for w in phrase.split()] + template = '{}_{}' + for word in word_length_list: + print(template.format(word[0], word[1])) +if __name__ == '__main__': + main() + + + + + + + + + +import re +import csv +def openforms(text): + forms = [] + text = text.lower() + forms = text.split() + for i in range(len(forms)): + forms[i] = forms[i].strip('.,?*()«»!\'\":; ') + return forms +def freqlist(forms): + freqs = {} + for i in range(len(forms)): + if forms[i] not in freqs: + freqs[forms[i]] = 1 + else: + freqs[forms[i]] +=1 + return freqs +def freqlist_to_csv(freqs): + with open('freq.csv', 'w', encoding='utf-8') as f: + output = csv.writer(f, delimiter=',') + header = ['слово', 'частота'] + output.writerow(header) + for key in sorted(freqs): + output.writerow([key, freqs[key]]) +def agosforms(text): + agos = re.findall('(?:(?:[А-Яа-яіѢѣЁё])+[\s,.!\?:;"\(\)\'»\n\t—]+?){3}[А-Яа-яiѢѣ]+?аго [А-Яа-яiѢѣ]+?(?:а|и)[\s,.!\?:;"\(\)\'»\n\t—]{,5}(?:[А-Яа-яiѢѣ]+?[\s,.!\?;:—"\(\)\'»\n\t]+?){3}',text) + with open('agos.txt', 'w', encoding='utf-8') as f: + output = f.write('\n'.join(agos)) +def main(): + with open ('Лесков.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + forms = openforms(text) + print(len(forms)) + freqs = freqlist(forms) + freqlist_to_csv(freqs) + agosforms(text) +if __name__ == '__main__': + main() + + + + + +import re +def main(): + with open('cats.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + mark_dogs = re.sub('([Сс]обак(?:а(?:х|ми?)?|и|е|у|о(?:й|ю))?)([\s,.!\?:"\(\)\'»\n\]\[-])', '<<<тут было слово \\1>>> \\2', text) + catstodogs = re.sub('([\s,.!\?:"\(\)\'«\n-])коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1собак\\2\\3', mark_dogs) + CatstoDogs = re.sub('([\s,.!\?:"\(\)\'«\n-])Коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1Собак\\2\\3', catstodogs) + dogstocats = re.sub('<<<тут было слово собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'кошк\\1', CatstoDogs) + dogstocats2 = re.sub('<<<тут было слово собак>>>', 'кошек', dogstocats) + DogstoCats = re.sub('<<<тут было слово Собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'Кошк\\1', dogstocats2) + DogstoCats2 = re.sub('<<<тут было слово Собак>>>', 'Кошек', DogstoCats) + catishtodogish = re.sub('кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'собач\\1', DogstoCats2) + CatishtoDogish = re.sub('Кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'Собач\\1', catishtodogish) + kittenstopyppies = re.sub('котята','щенята', CatishtoDogish) + KittenstoPyppies = re.sub('Котята','Щенята', kittenstopyppies) + kittentopyppy = re.sub('кот(?:е|ё)н(ок|ку)','щен\\1', KittenstoPyppies) + KittentoPyppy = re.sub('Кот(?:е|ё)н(ок|ку)','Щен\\1', kittentopyppy) + print(KittentoPyppy) +if __name__ == '__main__': + main() + + +import re +def main(): + with open('dates.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + dates = re.findall('(?:(?:0|1|2)|3(?:0|1))[0-9]\.(?:0|1(?:1|2)?)[0-9]\.[0-9]{2}', text) + print(*dates) +if __name__ == '__main__': + main() + + +import re +def main(): + with open('aphasy.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', text) + while re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) != clear: + clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) + print(clear) +if __name__ == '__main__': + main() + +import re +def three_consonants(text): + cons3 = re.findall('[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[йцкнгшщзхфвпрлджчсмтб]{3}[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[^\s,.!\?:"\(\)\'«»\n]*?[\s,.!\?:"\(\)\'»\n]', text, re.I) + for i in range(len(cons3)): + cons3[i] = cons3[i].strip('\s,.!\?:"\(\)\'»\n\t ') + return cons3 +def startwith(text): + abcs = re.findall(r'\b(?:а|о)(?:б|в).+?[\s,.!\?:"\(\)\'»\n]', text, re.I) + for i in range(len(abcs)): + abcs[i] = abcs[i].strip('\s,.!\?:"\(\)\'»\n\t ') + return abcs +def proper_nouns(text): + proper = re.findall('[а-яёa-z0-9] [А-ЯЁA-Z][а-яёa-z]+?[\s,.!\?:"\(\)\'»\n]' , text) + for i in range(len(proper)): + proper[i] = proper[i].split()[1] + proper[i] = proper[i].strip('\s,.!\?:"\(\)\'»\n\t ') + return proper +def analytical_future(text): + future = re.findall('буд(?:е(?:шь|те?|м)|ут?) .+?(?:а|е|и)ть(?:ся)?', text, re.I) + return future +def polysyllabic(text): + poly = re.findall(r'\b(?:[йцкнгшщзхфвпрлджчсмтб]*?[уеыаоюяиэ]){5,}[а-я]*?[\s,.!\?:"\(\)\'»\n]', text) + for i in range(len(poly)): + poly[i] = poly[i].strip('\s,.!\?:"\(\)\'«»\n\t ') + return poly +def roman_num(text): + rawroman = re.findall('\sC?M*?C?D?L?C{,4}X?L?I?X{,4}I?V?I{,4}\s', text) + roman = [] + for i in range(len(rawroman)): + rawroman[i] = rawroman[i].strip('\s,.!\?:"\(\)\'«»\n\t ') + if rawroman[i]: + roman.append(rawroman[i]) + return roman +def main(): + with open('text.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + + + + + + +if __name__ == '__main__': + main() + + + +import re +def clean(html): + noscript = re.sub(']*?>[^<>]*?', '', html) + nostyle = re.sub(']*?>[^<>]*?', '', noscript) + nospan = re.sub(']*?>[^<>]*?', '', nostyle) + notags = re.sub('<[^>]*>', '', nospan) + notags1 = re.sub('{[^}]*}', '', notags) + text = re.sub('[&][^;]*;', ' ', notags1) + text = re.sub(r'\s+', ' ', text) + return text +def html(text): + tags = re.findall(r'<[^>]*?>', text) + return tags +def main(): + with open('schizo.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + with open('html.txt', 'w', encoding = 'utf-8') as f: + output = f.write('\n'.join(html(text))) + with open('pure.txt', 'w', encoding = 'utf-8') as f: + output = f.write(clean(text)) +if __name__ == '__main__': + main() + + + +import re +def main(): + given = input('Введите свой телефонный номер: ') + right = re.search('\+7 \([0-9]{3}\) [0-9]{3}-[0-9]{2}-[0-9]{2}', given) + if right: + print('Введённый номер совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.') + if re.search('\(9(?:2|3)', given): + print('Это Мегафон.') + elif re.search('\(9(?:1|8)', given): + print('Это МТС.') + elif re.search('\(96', given): + print('Это Билайн.') + else: + print('Я не могу точно сказать, какой это оператор.') + else: + print('Введённый номер не совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.') + if re.search('\(9(?:2|3)', given) or re.search('\+7 ?9(?:2|3)', given) or re.match('8 ?9(?:2|3)', given): + print('Это Мегафон.') + elif re.search('\(9(?:1|8)', given) or re.search('\+7 ?9(?:1|8)', given) or re.match('8 ?9(?:1|8)', given): + print('Это МТС.') + elif re.search('\(96', given) or re.search('\+7 ?96', given) or re.match('8 ?96', given): + print('Это Билайн.') + else: + print('Я не могу точно сказать, какой это оператор.') +if __name__ == '__main__': + main() + + + + + +import re +def revert(dictionary): + reverted = {} + for key in dictionary: + reverted[dictionary[key]] = key + return reverted +def russian_to_latin_dictionary(lines): + raw = {} + rus_to_lat = {} + for i in range(len(lines)): + raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n') + raw = revert(raw) + for key in raw: + if len(key.split(',')) > 1: + for i in range(len(key.split(','))): + rus_to_lat[key.split(',')[i-1].strip()] = raw[key] + i +=10 + else: + rus_to_lat[key] = raw[key] + return rus_to_lat +def latin_to_russian_dictionary(lines): + raw = {} + lat_to_rus = {} + for i in range(len(lines)): + raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n') + for key in raw: + if len(key.split(',')) > 1: + for i in range(len(key.split(','))): + lat_to_rus[key.split(',')[i-1].strip()] = raw[key] + i +=10 + else: + lat_to_rus[key] = raw[key] + return lat_to_rus +def main(): + with open ('latin.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for i in range(len(lines)): + lines[i] = re.sub('(?:–|−|-)', '—', lines[i]) + lines[i] = re.sub(';', ',', lines[i]) + print(latin_to_russian_dictionary(lines)) + print(russian_to_latin_dictionary(lines)) +if __name__ == '__main__': + main() + + + +def process(fname): + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + forms = text.split() + for i in range(len(forms)): + forms[i] = forms[i].strip('.,!?*()«»\'":][><') + return forms +def freqlist(forms): + freqs = {} + for i in range(len(forms)): + if forms[i] not in freqs: + freqs[forms[i]] = 1 + else: + freqs[forms[i]] +=1 + return freqs +def maxfreq(frequencies): + maximumfreq = [] + for key in frequencies: + if frequencies[key] == max(frequencies.values()): + maximumfreq.append(key) + return maximumfreq +def averagefreq(frequencies): + total = 0 + for key in frequencies: + total += frequencies[key] + average = total/len(frequencies) + return average +def main(): + forms = process('text.txt') + frequencies = freqlist(forms) + print(*maxfreq(frequencies), '- самое частотное слово в тексте.') + print(averagefreq(frequencies), '- средняя частота слов в тексте.') +if __name__ == '__main__': + main() + + + + + + + + + + + +import random +def ask(): + user_info = [] + name = input('Как Вас зовут? ') + surname = input('Какая у Вас фамилия? ') + age = input('Сколько Вам лет? ') + food = input('Какая у Вас любимая еда? ') + musician = input('Какая у Вас любимая музыкальная группа? ') + dream = input('Какая у Вас заветная мечта? ') + user_info.append(name+' '+surname) + user_info.append([age, food, musician, dream]) + return user_info +def guess(database_dictionary): + person = random.choice(list(database_dictionary.keys())) + clue = random.choice(['его/её мечта: '+database_dictionary[person][3], 'его/её любимая музыкальная группа: '+database_dictionary[person][2], 'его/её мечта: '+database_dictionary[person][3]+'\nего/её любимая еда: '+database_dictionary[person][1], 'его/её любимая музыкальная группа: '+database_dictionary[person][2]+'\nего/её любимая еда: '+database_dictionary[person][1]]) + guess = input('Угадайте, кто это (имя и фамилию)? Подсказка: '+clue+' ') + if guess == person: + return 'Правильно!' + else: + return 'Нет, неправильно, это - '+person +def main(): + database = {} + i = 0 + while i < 7: + answer = ask() + i += 1 + database[answer[0]] = answer[1] + print(guess(database)) +if __name__ == '__main__': + main() + + + + + + +import re +def main(): + with open('Лингвистика.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + lang = 'язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])' + Lang = 'Язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])' + new_text = re.sub(lang,'шашлык\\1\\2', text) + new_text = re.sub(Lang,'Шашлык\\1\\2', new_text) + with open('Новая лингвистика.txt', 'w', encoding = 'utf-8') as f: + f.write(new_text) +if __name__ == '__main__': + main() + + +import os +extension_frequency_list = {} +for root, dirs, files in os.walk('.'): + for f in files: + file_name = f.split('.')[0] + file_ext = f.split('.')[1] + if file_ext not in extension_frequency_list: + extension_frequency_list[file_ext] = 1 + else: + extension_frequency_list[file_ext] += 1 +max_ext = max(extension_frequency_list.values()) +i = 0 +for key in extension_frequency_list: + if extension_frequency_list[key] == max_ext: + if i == 0: + print('The most frequent extention is \''+key+'\'. There is(are) '+str(extension_frequency_list[key])+' file(s) with it.') + i = 1 + else: + print('There is(are) also '+str(extension_frequency_list[key])+' \''+key+'\' file(s).') + + + + +def opentext(fname): + forms = [] + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + forms = text.split() + for i in range(len(forms)): + forms[i] = forms[i].strip('.,!?*()«»\'"') + return forms +def adj_hood(fname): + words = opentext(fname) + hoods = [] + for i in range(len(words)): + if len(words[i])>4: + if words[i][-1] == 'd': + if words[i][-2] == 'o': + if words[i][-3] == 'o': + if words[i][-4] == 'h': + if words[i] not in hoods: + hoods.append(words[i]) + return hoods +def count_frequency(fname, word): + words = opentext(fname) + word_freq = 0 + for i in range(len(words)): + if words[i] == word: + word_freq += 1 + return word_freq +def main(): + fname = input('Введите имя файла: ') + hoods = adj_hood(fname) + print('В тексте встретилось', len(hoods), 'прилагательных с суффиксом -hood.') + freq = [] + for i in range(len(hoods)): + freq.append(count_frequency(fname, hoods[i])) + min_freq = [] + for i in range(len(hoods)): + if freq[i] == min(freq): + min_freq.append(hoods[i]) + print('Самые редкие прилагательные с суффиксом -hood: ', ', '.join(min_freq)) + roots = [] + for i in range(len(hoods)): + roots.append(hoods[i][0:-4]) + print('Корни прилагательных с суффиксом -hood: ', ', '.join(roots)) +if __name__ == '__main__': + main() + + + +words = [] +with open('text.txt','r', encoding = 'utf-8') as f: + text = f.read() + words_raw = text.split() + words = [] + for i in range(len(words_raw)): + words.extend(words_raw[i].split('\n')) +len3 = 0 +len1 = 0 +for word in words: + if len(word) == 3: + len3 += 1 + elif len(word) == 1: + len1 += 1 +if len1 == 0: + print('В файле нет слов длины 1.') +elif len3 == 0: + print('В файле нет слов длины 3.') +else: + print('В файле в '+str(len3/len1)+' раз больше слов длины 3, чем слов длины 1.') + + + +import random +def nom_noun(): + with open('nomnouns.txt','r', encoding = 'utf-8') as f: + nomnouns = f.read().split('\n') + return random.choice(nomnouns) +def acc_noun(): + with open('accnouns.txt','r', encoding = 'utf-8') as f: + accnouns = f.read().split('\n') + return random.choice(accnouns) +def adverb(): + with open('adverbs.txt','r', encoding = 'utf-8') as f: + adverbs = f.read().split('\n') + return random.choice(adverbs) +def intensifier(adv): + with open('intensifiers.txt','r', encoding = 'utf-8') as f: + intensifiers = f.read().split('\n') + return random.choice(intensifiers) + ' ' + adv +def verb_of_thought(subj): + with open('thoughtverbs.txt','r', encoding = 'utf-8') as f: + thoughtverbs = f.read().split('\n') + return subj + ' ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.' +def trans_verb(subj, obj): + with open('transverbs.txt','r', encoding = 'utf-8') as f: + transverbs = f.read().split('\n') + return subj + ' ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj +def trans_verb_negative(subj, obj): + with open('transverbs.txt','r', encoding = 'utf-8') as f: + transverbs = f.read().split('\n') + negative_sentences = [subj + ' не ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj, subj + ' ' + intensifier(adverb()) + ' не ' + random.choice(transverbs)+ ' ' + obj] + return random.choice(negative_sentences) +def verb_of_thought_negative(subj, obj): + with open('thoughtverbs.txt','r', encoding = 'utf-8') as f: + thoughtverbs = f.read().split('\n') + return subj + ' не ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.' +def positive(): + positive_sentences = [trans_verb(nom_noun(), acc_noun()) + '.', verb_of_thought(nom_noun())] + return random.choice(positive_sentences) +def question(): + questions = ['зачем ' + trans_verb(nom_noun(), acc_noun()) + '?', 'почему ' + verb_of_thought(nom_noun())] + return random.choice(questions) +def negative(): + negative_sentences = [verb_of_thought_negative(nom_noun(), acc_noun()), trans_verb_negative(nom_noun(), acc_noun())] + return random.choice(negative_sentences) +def conditional(): + with open('transverbs.txt','r', encoding = 'utf-8') as f: + transverbs = f.read().split('\n') + conditional_sentences = ['если ' + positive().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + positive().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun()] + return random.choice(conditional_sentences) +def imperative(): + with open('imperatives.txt','r', encoding = 'utf-8') as f: + imperatives = f.read().split('\n') + imperative_sentences = ['пусть ' + positive(), 'пусть ' + negative(), 'пусть ' + conditional(), random.choice(imperatives) + ' ' + acc_noun()] + return random.choice(imperative_sentences) +def main(): + sentences = [positive(), question(), negative(), conditional(), imperative()] + random.shuffle(sentences) + for i in range(5): + print(sentences[i].capitalize()) +if __name__ == '__main__': + main() + + + +a = input("Введите первое число. ") +a = float(a) +b = input("Введите второе число. ") +b = float(b) +c = input("Введите третье число. ") +c = float(c) +if b: + if a%b == c: + print("a даёт остаток c при делении на b") + else: + print("a не даёт остаток c при делении на b") +else: + print("Делите на ноль сами!") +if (a*c)+b == 0: + print("c является решением линейного уравнения ax + b = 0") +else: + print("c не является решением линейного уравнения ax + b = 0") +list_list = [['l'],['s'],['d']] +def el_0(any_list): + return any_list[0] +a = ' '.join([el_0(el) for el in list_list]) +print(a) + +b = list(map(el_0, list_list)) +print(b) + +b = ' '.join(list(map(el_0, list_list))) +print(b) + +c = ' '.join(list(map(lambda any_list: any_list[0], list_list))) +print(c) + +number = input("print any number") +square = int(number)**0.5 +if square/1 == square//1: + print ("yes") +else: + print ("no") + + + +import codecs, re +def open_file(title): + a = codecs.open(title, 'r', 'utf-8') + words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()] + return words +def find_bigramm(words): + text = '' + for word in words: + text += word + ' ' + found = 0 + for x in range(len(words)-1): + bigramm = words[x] + ' ' + words[x+1] + m = re.findall(bigramm, text, flags = re.U) + if len(m) > 2: + print(True) + found = 1 + break + if found == 0: + print(False) +def main(): + f = open_file('text.txt') + z = find_bigramm(f) +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import os, codecs +def open_file(title): + a = codecs.open(title, 'r', 'utf-8') + words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()] + return words +def count_word_frequency(words): + freq_dict = {} + for word in words: + try: + freq_dict[word] += 1 + except KeyError: + freq_dict[word] = 1 + return freq_dict +def find_max_keys(dict_num_values, amount): + values_list = dict_num_values.values() + max_values = [] + i = 0 + while i < amount: + local_max = max(values_list) + max_values.append(local_max) + if local_max != 1: + values_list = [x for x in values_list if x != local_max] + i += 1 + max_keys = [] + for key in dict_num_values: + if dict_num_values[key] in max_values: + max_keys.append(key) + return max_keys +def extract_words_from_txt_in_folder(path): + words = [] + for root, dirs, files in os.walk(path): + for f in files: + if len(f.split('.')) == 2: + file_name = f.split('.')[0] + file_ext = f.split('.')[1] + if file_ext == 'txt': + words += open_file(os.path.join(root, f)) + return words +def main(): + print(find_max_keys(count_word_frequency(extract_words_from_txt_in_folder('.')),10)) +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import random +with open('words.txt','r', encoding = 'utf-8') as f: + lines = f.readlines() + lenlines = len(lines) + random.shuffle(lines) + score = 0 + for line in lines: + line = line.strip() + word, hint = line.split(' ',1) + response = input('Какое слово я загадала?\n'+ + 'Подсказка: '+hint+' ') + if response == word: + print('Правильно!') + score += 1 + else: + print('Нет, слово было', word) +with open('scores.txt', 'w', encoding = 'utf-8') as n: + percent = score/lenlines*100 + n.write('Вот результат: ') + n.write(str(percent)+'%') + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import codecs +def open_file(file_name): + f = codecs.open(file_name, 'r', 'utf-8') + words = [] + for line in f: + line = line.strip() + words += line.split() + for word in words: + word = word.strip('.,!?:;()\'\"1234567890') + word = word.lower() + return words +def bigramms(words): + + bi = create_list(words) + dic = {} + for j in bi: + if j not in dic: + dic[j] = 1 + else: + dic[j] += 1 + answer = '' + answer = [n+'\r\n' for n in dic] + for key in dic: + if dic[key] > 2: + answer = True + else: + answer = False + print(answer) + return answer +def create_list(words): + bi = [] + for i in range(len(words)): + if i < len(words) - 1: + j = i+1 + bi.append(words[i] + ' ' + words[j]) + return bi +words = open_file('text.txt') +bigramms(words) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re +def main(): + with open ('china space programm.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + reg = '«[А-ЯЁа-яё]+?-[1-9]+»' + all_matches = re.findall(reg, text) + pure_names =[] + for i in range(len(all_matches)): + if re.sub(r'-[1-9]+', '', all_matches[i]) not in pure_names: + pure_names.append(re.sub(r'-[1-9]+', '', all_matches[i])) + all_matches += pure_names + print(all_matches) +if __name__ == '__main__': + main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def opentext(fname): + forms = [] + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + forms = text.split() + for i in range(len(forms)): + forms[i] = forms[i].strip('.,?*()«»') + return forms + + + +def first_letter(letter, fname = 'text.txt'): + starting_with_letter = [] + forms = opentext(fname) + for i in range(len(forms)): + if forms[i][0] == letter: + starting_with_letter.append(forms[i]) + return starting_with_letter + + + +def questions(): + fname = input('Введите имя файла: ') + letter = input('Введите букву: ') + number = int(input('Введите целое число: ')) + starting_with_letter = first_letter(letter, fname) + answer = [] + for i in range(len(starting_with_letter)): + if len(starting_with_letter[i]) > number: + answer.append(starting_with_letter[i]) + return answer + + + +def adjectives(fname): + forms = opentext(fname) + adj = [] + for i in range(len(forms)): + if len(forms[i]) > 2: + if forms[i][-1] == 'й': + if forms[i][-2] == 'o' or forms[i][-2] == 'ы' or forms[i][-2] == 'и': + if i != len(forms)-1: + adj.append(forms[i]+' '+forms[i+1]) + else: + adj.append(forms[i]) + elif forms[i][-1] == 'я': + if forms[i][-2] == 'а' or forms[i][-2] == 'я': + if i != len(forms)-1: + adj.append(forms[i]+' '+forms[i+1]) + else: + adj.append(forms[i]) + elif forms[i][-1] == 'е': + if forms[i][-2] == 'o' or forms[i][-2] == 'е': + if i != len(forms)-1: + adj.append(forms[i]+' '+forms[i+1]) + else: + adj.append(forms[i]) + return adj + + + + + + + + + + + + + + + + + + +with open('information.txt', 'w', encoding = 'utf-8') as n: + name = input('Как Вас зовут? ') + n.write(name+'\n') + age = input('Сколько Вам лет? ') + n.write(str(age)+'\n') + color = input('Какой у Вас любимый цвет? ') + n.write(color+'\n') + musician = input('Какой у Вас любимый исполнитель? ') + n.write(musician+'\n') + dream = input('Какая у Вас мечта? ') + n.write(dream+'\n') +with open('information about Mary.txt','r', encoding = 'utf-8') as f: + info = f.readlines() + for line in range(len(info)): + info[line] = info[line].strip() + response = input('Как Вашего соседа зовут? ') + if response == info[0]: + print('Правильно!') + else: + print('Нет, его зовут '+info[0]+'.') + response = input('Сколько Вашему соседу лет? ') + if str(response) == info[1]: + print('Правильно!') + else: + print('Нет, ему '+info[1]+' лет.') + response = input('Какой у Вашего соседа любимый цвет?') + if response == info[2]: + print('Правильно!') + else: + print('Нет, его любимый цвет - '+info[2]+'.') + response = input('Какой у Вашего соседа любимый исполнитель?') + if response == info[3]: + print('Правильно!') + else: + print('Нет, его любимый исполнитель - '+info[3]+'.') + response = input('Какая у Вашего соседа мечта?') + if response == info[4]: + print('Правильно!') + else: + print('Нет, его мечта - '+info[4]+'.') + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +number = input("What's your phone number") +if '+1' in number or number.endswith("2"): + pass +elif '+7' in number or number.startswith('8'): + print("Как дела в России?") +elif '+4' in number: + print("Как дела в Англии?") +else: + print("Как дела в мире?") + + + + + + + + +import os, codecs +from math import log +def preprocess(text): + punct = '[.,!«»?&@"$\[\]\(\):;% + tabs = '\t\n' + text_wo_punct = re.sub(punct, '', text.lower()) + text_wo_punct = re.sub(tabs, '', text_wo_punct) + words = text_wo_punct.strip().split() + return words +def count_tf(word, text): + n = text.count(word) + return n/len(text) +def count_df(word, texts): + + + + + i = [True for text in texts if word in text] + df = len(i) + return df +def count_idf(word, texts): + df = count_df(word, texts) + try: + idf = len(texts)/df + except ZeroDivisionError: + return 0 + return idf +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + tfidf = log(tf, 10) * log(idf, 10) + return tfidf +def extract_textS_from_folder(path): + texts = [] + for root, dirs, files in os.walk(path): + for f in files: + with open(os.path.join(root, f) , "r", encoding = 'utf-8') as t: + content = t.read + text = preprocess(content) + texts.append(text) + return texts +def keywords(text, texts): + keywords = {} + dic_tfidf = {} + for word in text: + if word in dic_tfidf: + continue + tfidf = count_tfidf(word, text, texts) + dic_tfidf[word] = tfidf + i = 0 + for el in sorted(dic_tfidf, key = lambda x: dic_tfidf(x)): + if i > 5: + break + else: + i += 1 + keywords[el] = dic_tfidf[el] + return keywords +def main(): + texts = extract_text_from_folder('wikipedia') + for t in texts: + kwords = keywords(t, texts) + for key in kwords: + print(key, kwords[key]) +if __name__ == "__main__": + main() + + + + +import os +import re +from math import log +def preprocessing(text): + punct = '[.,_!«»?&@"$\/\\[\]\(\):;% + tabs = '\n\t\s' + num = '[0-9]' + text_wo_punct = re.sub(punct, '', text.lower()) + text_wo_punct = re.sub(tabs, ' ', text_wo_punct) + text_wo_punct = re.sub(num, '', text_wo_punct) + words = text_wo_punct.strip().split() + return words +def count_tf(word, text): + i = 0 + for w in text: + if w == word: + i += 1 + + tf = i / len(text) + return tf +def count_df(word, texts): + i = 0 + + + + + + i = [1 for text in texts if word in text] + df = sum(i) + return df +def count_idf(word, texts): + df = count_df(word, texts) + idf = len(texts)/ (1 + df) + return idf +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + tfidf = log(tf, 10) * log(idf, 10) + return tfidf +def keywords(text, texts): + dic_tfidf = {} + kwords = {} + for word in text: + if word in dic_tfidf: + continue + tfidf = count_tfidf(word, text, texts) + dic_tfidf[word] = tfidf + i = 0 + for el in sorted(dic_tfidf, key=lambda x: dic_tfidf[x]): + if i > 5: + break + else: + i += 1 + kwords[el] = dic_tfidf[el] + return kwords +def main(): + texts = {} + for root, dirs, files in os.walk('wikipedia'): + for f in files: + with open(os.path.join(root, f), 'r', encoding='utf-8') as t: + content = t.read() + text = preprocessing(content) + texts[f] = text + raw_texts = list(texts.values()) + for t in texts: + print('\nИзвлекаем ключевые слова для текста "{}"'.format(t.split('.')[0])) + kwords = keywords(texts[t], raw_texts) + for key in kwords: + print (key, kwords[key]) +if __name__ == '__main__': + main() +import re +import os +from math import log + +def open_words(fname): + forms = [] + with open (fname, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + punct = '[.,?!|:;*№\"\'—@ + text = re.sub(punct, '', text) + text = re.sub('[\n\t]', ' ', text) + forms = text.split() + + + return forms + +def make_freq(arr): + d = {} + for el in arr: + try: + d[el] += 1 + except KeyError: + d[el] = 1 + return d + +def make_bigrams(arr): + bigrams = [] + for i in range(len(arr)-1): + bigr = arr[i] + ' ' + arr[i+1] + bigrams.append(bigr) + return bigrams + +def count_pmi(x, y): + try: + p_x = word_freq[x]/len(words) + except KeyError: + p_x = 0 + try: + p_y = word_freq[x]/len(words) + except KeyError: + p_y = 0 + try: + bigr = x + ' ' + y + p_xy = bigrams_freq[bigr]/len(bigrams) + except KeyError: + p_xy = 0 + try: + pmi = log(p_xy/(p_x*p_y)) + except ZeroDivisionError: + pmi = 0 + return pmi + +def calculate_pmi(): + pmis = {} + for bigr in bigrams: + x, y = bigr.split() + pmi = count_pmi(x, y) + pmis[bigr] = pmi + return pmis + + + + + + + + + + + + + + + + + + + + +def calculate_pmi_cats(word, cathegory): + p_word = freq_all[word]/len(words_all) + p_cat = 1/3 + if cathegory == 'anek': + d = freq_anek + w = len(corpus_anek_words) + elif cathegory == 'izvest': + d = freq_izvest + w = len(corpus_izvest_words) + elif cathegory == 'teh': + d = freq_teh + w = len(corpus_teh_words) + p_word_cat = d[word]/w + pmi = log(p_word_cat/(p_word*p_cat)) + return pmi +def main(): + corpus_anek_words = [] + corpus_izvest_words = [] + corpus_teh_words = [] + for root, dirs, files in os.walk('texts'): + if 'anekdots' in root: + for f in files: + corpus_anek_words += open_words(os.path.join(root, f)) + if 'teh_mol' in root: + for f in files: + corpus_teh_words += open_words(os.path.join(root, f)) + if 'izvest' in root: + for f in files: + corpus_izvest_words += open_words(os.path.join(root, f)) + words = corpus_anek_words + corpus_teh_words + corpus_izvest_words + freq_anek = make_freq(corpus_anek_words) + freq_izvest = make_freq(corpus_izvest_words) + freq_teh = make_freq(corpus_teh_words) + freq_all = make_freq(words) + words_cathegory_dict = {} + for w in words: + i = 0 + try: + if i < 100: + pmi_anek = calculate_pmi_cats(w, 'anek') + pmi_cats(w, 'anek') + pmi_izvest = calculate_pmi_cats(w, 'izvest') + pmi_teh = calculate_pmi_cats(w, 'teh') + pmi_max = max(pmi_anek, pmi_izvest, pmi_teh) + if pmi_max == pmi_anek: + words_cathegory_dict[w] = 'anek' + if pmi_max == pmi_teh: + words_cathegory_dict[w] = 'teh' + if pmi_max == pmi_anek: + words_cathegory_dict[w] = 'teh' + i += 1 + except KeyError: + pass + print(words_cathegory_dict) +if __name__ == '__main__': + main() + + + + +import shutil +import os +name = input('Print any sentence. ') +words = name.split() +path = words[0] +for i in range(1, len(words)): + path = os.path.join(path, words[i]) +os.makedirs(path) + + + +import shutil +import os +num = int(input('Print any natural number. ')) +for i in range(num): + name = str(i+1) + os.makedirs(name) + for a in range(i+1): + filename = os.path.join(name,str(a+1)+'.txt') + with open(filename, 'w', encoding = 'utf-8') as f: + f.write('') + +import os +import shutil +filelist = [f for f in os.listdir() if os.path.isfile(f)] +print(filelist) + +import os +import shutil +path = os.path.abspath('.') +path2 = os.getcwd() +universalpath = os.path.join('texts', '1.txt') +exists = os.path.exists('texts\1.txt') +exists2 = os.path.exists(os.path.join('texts', '1.txt')) +filelist = os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') +s = 'Hello! ' +i = 1 +for f in filelist: + if f.endswith('.txt'): + with open(f, 'a', encoding = 'utf-8') as w: + w.write(s*1) + i += 1 +texts = [f for f in os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') if f.endswith('.txt')] +if not os.path.exists('ab'): + os.mkdir('ab') +if not os.path.exists(r'a\long\long\long\long\path'): + os.makedirs(r'a\long\long\long\long\path') +if os.path.exists('ab') and not os.path.exists('abc'): + os.rename('ab', 'abc') +if os.path.exists(r'a\long\long\long') and not os.path.exists(r'a\long\long\longer'): + os.rename(r'a\long\long\long', r'a\long\long\longer') +isfile = os.path.isfile(r'texts\1.txt') +isdir = os.path.isdir(r'a\long\long') +print(os.listdir()) +shutil.copy(r'texts\1.txt', r'newcorpus') +shutil.copytree(r'texts', r'corpus') +shutil.move(r'texts\2.txt', r'newcorpus') +os.remove(r'corpus\2.txt') +shutil.rmtree('newcorpus') +shutil.rmtree('a') +shutil.rmtree('abc') + + + + +import os +import shutil +directory = input('Print any path working in your OS. ') +if os.path.exists(directory): + filelist = [f for f in os.listdir() if os.path.isfile(f)] + extlist = [] + for f in filelist: + ext = f.split('.')[1] + if ext not in extlist: + extlist.append(ext) + extdict = {} + for ext in extlist: + for f in filelist: + if f.endswith(ext): + if ext not in extdict: + extdict[ext] = 1 + else: + extdict[ext] +=1 +else: + directory = os.getcwd() +print(extdict) + + +import re +def main(): + with open ('hse.html', 'r', encoding = 'utf-8') as f: + text = f.read() + card_reg = '
(\2-\d))' +def find(): + with open('lang.html', 'r', encoding = 'utf-8') as f: + content=f.read() + isos=re.findall(reg, content) + return isos +def save(): + isos=find() + with open('isos.txt', 'w', encoding = 'utf-8') as n: + for iso in isos: + n.write(iso[2] + '\n') +save() +import os +import re +def maxfiles (): + numfiles = 0 + name = '' + for root, dirs, files in os.walk('.'): + if len (files) > numfiles: + numfiles = len (files) + name = re.sub(r'.*/', '', root) + print ('Больше всего файлов в папке:', name) +maxfiles () +import os +import re +allobj = os.listdir('.') +lat = r'[A-Za-z]' +kir = r'[А-Яа-я]' +folders = [] +fold_new = [] +folds = allobj +def fold_num(): + for f in folds: + if os.path.isfile(f) == True: + folds.remove(f) + for fl in folds: + if re.search(lat, fl) != None and re.search(kir, fl) != None: + folders.append(fl) + print ('папок, название которых содержит и кириллические, и латинские символы:', len(folders)) +def norepeat(): + for obj in allobj: + index = obj.rfind('.') + if index != -1: + obj = obj[:index] + if fold_new.count(obj) == 0: + fold_new.append(obj) + print (obj) +fold_num() +norepeat() +import re +def text(): + with open('text.txt', 'r', encoding = 'utf-8') as f: + f = f.read() + f=f.lower() + sens = re.split('[.\?!] ',f) + sens1 = [re.sub (r'[,.()?!:;-]', '', sen) for sen in sens ] + sens_new = [ sen.split(' ') for sen in sens1 ] + return sens_new +def word_num(): + sens = text() + number = {word: sen.count(word) for sen in sens for word in sen} + flat = [word for sen in sens for word in sen] + for word in flat: + if flat.count(word)>1: + flat.remove(word) + for word in flat: + if number[word]>1: + print(word, '{:^10}'.format(number[word])) +word_num() +import random +def opendict(): + d={} + with open ('dict.csv', 'r', encoding='utf-8') as f: + text = f.readlines() + for p in text: + prs=[] + pr=p.strip('\n') + prs=p.split() + d[prs[0]]=prs[1] + return d +def zag(): + d=opendict() + klus=[] + for key in d: + klus.append(key) + klu=random.choice(klus) + print('отгадай-ка слово:', klu,'...') + return klu +def good(): + with open ('good.txt', 'r', encoding='utf-8') as f: + text = f.readlines() + well=random.choice(text) + return print(well) +def oops(): + with open ('false.txt', 'r', encoding='utf-8') as f: + text = f.readlines() + false=random.choice(text) + return print(false) +def otvet(): + klu=zag() + d=opendict() + slovo=input('ответ:') + if slovo==d[klu]: + return good() + else: + return oops() +otvet() +with open('textr.txt', 'r', encoding = 'utf-8') as t: + fiveword=0 + numline=0 + print(t.read) + for line in t: + words=[] + words=line.split(' ') + if len(words)>5: + fiveword+=1 + numline+=1 +print (fiveword) +print ('Кол-во строк:', numline) +print ('В файле '+str(int(((fiveword/numline)*100)//1))+'% строк, в которых больше пяти слов') + +def filename(): + fname=input('Введите имя файла ') + return fname +def opentext (): + with open (filename(), 'r', encoding='utf-8') as f: + text=f.read() + textl=text.lower() + ws = textl.split() + tekst=[] + for w in ws: + wstr=w.strip('!?.,:;()') + tekst.append(wstr) + return tekst +def unws(tekst): + text=tekst + unws=[] + for w in text: + if w[0:2]=='un': + unws.append(w) + return unws +def nunws(unws): + return len(unws) +def perc (num): + ws=opentext() + longw=[] + for w in unws(ws): + if len(w)>num: + longw.append(w) + return str(int(len(longw)/nunws(unws(ws))*100))+'%' +perc(int(input('Введите число '))) +print('Введите три числа в столбик') +a = int (input()) +b = int (input()) +c = int (input()) +if a/b==c : + print ( 'условие 5 соблюдается (a/b=c)') +else: + print ( 'условие 5 не соблюдается (a/b≠c)') +if a**b==c : + print ( 'условие 6 соблюдается (a^b=c)') +else: + print ( 'условие 6 не соблюдается (a^b≠c)') +import re +pti=r'птиц' +ppti=r'Пт[ии́]ц' +fish=r'рыб' +ffish=r'Рыб' +ptiey=r'птицей' +pptiey=r'Птицей' +fishy=r'рыбой' +ffishy=r'Рыбой' +def cont(): + with open('birds.html', 'r', encoding = 'utf-8') as f: + content=f.read() + return content +def text(): + birds=cont() + ryba=re.sub(pti,fish, birds) + birds=re.sub(ppti,ffish,ryba) + ryba=re.sub(ptiey,fishy,birds) + birds=re.sub(pptiey,ffishy,ryba) + return birds +def save(): + new=text() + with open('ryby.txt', 'w', encoding = 'utf-8') as n: + n.write(new) + return new +save() + +file = open("your txt file here","r",encoding='utf8') +arriva = 0 +arr = 0 +for word in file.read().split(): + if len(word) >= 0: + arriva += 1 + if len(word) >= 10: + arr += 1 +print(arr/arriva*100) +file.close() +d = {'длинная': "дорога", + 'пустая': "бутылка", + 'великий': "новгород", + 'белокаменная': "москва", + 'пребрежный': "город", + 'серая': "мышь", + 'ласковый': "кот", + 'певучая': "птица", + 'старый': "дуб", + 'дождливая': "погода"} +keys_list = list(d.keys()) +arr = 1 +import random +g = random.choice(keys_list) +p = input(g + ":") +se = g,p +while se not in d.items(): + arr += 1 + p = input("попробуй ещё:") + se = g,p +else: + print("ты угадал") + print("всего попыток:", arr) +import os +count = 0 +for root, dirs, files in os.walk('.'): + for f in files: + if f.split('.') not in names: + count += 1 + names.append(f.split('.')) +print('Найдено {} файла(ов):'.format(count)) +a = int(input("Введите первое число:")) +b = int(input("Введите второе число:")) +c = int(input("Введите третье число:")) +if a * b == c: + print("Умножение удалось") +else: + print("умножение не удалось") +if a / b == c: + print("Деление удалось") +else: + print("Деление не удалось") +arr = [] +word = input('latin word please:') +while word: + arr.append(word) + word = input('latin word please:') +for w in arr: + if w.endswith('t'): + print(w) +import re +regex = "(съе)(л|в?|сть?)(а?|и?|ш?)?(ий?|ая?|ие?)? " +f = open("your file here.txt", "r" ,encoding='utf-8') +j = re.findall(regex, f.read()) +print(*j, sep = '\n') +n = int(input("Введите число:")) +for i in range(1,11): + print(i*n) +wordic = input('enter your word:') +for index,_ in enumerate(wordic): + print (" ".join(wordic[:index+1])) +import random +def noun(): + nouns = ['собака','велосипед','коробка','радуга','телефон','тетрадь','хлеб','пирог','замок','билет','бритва','скамейка','магазин','колесо','щкафчик','тарелка'] + return random.choice(nouns) +def adverb(): + nouns = ['жарко','холодно','больно','страшно','громко','вызывающе','немедленно','быстро','яростно','высоко','далеко','мужественно','скучно'] + return random.choice(nouns) +def verb(): + nouns = ['горит','лежит','бежит','едет','прыгает','кушает','поет','разминается','опаздывает','тонет','сидит','идет','кидает','включает','пишет','дерется'] + return random.choice(nouns) +def sub_conj(): + nouns = ['потому что','если','пока','когда','так что','ибо'] + return random.choice(nouns) +def comp_conj(): + nouns = ['и','также','а','но','однако','зато'] + return random.choice(nouns) +def random_sentence(): + sentence = noun() + ' ' + verb() + ' ' + adverb() + ' ' + (sub_conj() or comp_conj()) + ' ' + noun() + ' ' + verb() + ' ' + adverb() + '.' + return sentence +num_of_sents = random.randint(6, 20) +for i in range(num_of_sents): + sentence = random_sentence() + sentence = sentence.capitalize() + print(sentence, end=' ') +def function(s): + file = open(s, 'r', encoding='utf-8') + words = [word for line in file for word in line.split()] + return words +def count(): + adj = 0 + for line in (function("dzzz.txt")): + for word in line.split(): + if word.endswith('ons'): + adj += 1 + return adj +def average(): + a = [] + for line in (function("dzzz.txt")): + for word in line.split(): + if word.endswith('ons'): + a.append(word) + av = sum(len(word) for word in a)/len(a) + return av +print('всего прилагательных с суффиксом "-ons":', count()) +print('средняя длина:', average()) +import os +import re +total_number = 0 +for m in os.listdir("C:\\Users\\Никита\\Desktop\\homework 16\\papka"): + if re.findall('\w+' ' ' '\w+', m): + total_number +=1 +print("папок с двумя и более словами:", total_number) +for m in os.listdir("C:\\Users\\Никита\\Desktop\\homework 16\\papka"): + s = re.findall('\w+' ' ' '\w+', m) + for e in s: + print(s) +counter = 0 +f = open('C:\\Users\\Никита\\Desktop\\kontrosha\\kontrol.txt').read() +for lines in f.split(): + if '' not in lines: + counter += 1 + else: + break +print(counter) +with open('C:\\Users\\Никита\\Desktop\\kontrosha\\resultat.txt', 'w', encoding='utf-8') as file: + print(counter, file=file) +print('результат распечатан в файл') +import re +def open_html(f): + with open(f, 'r', encoding='utf-8') as file: + content = file.read() + return content +content = open_html(r'C:\Users\Никита\Desktop\hw14 (families)\arbuz.html') +Link = r'(.*?)' +links = re.findall(Link, content) +for link in links[:10]: + result = link[2] +tu = open('dz.txt', 'w').write((result)) +file = open('dz.txt').read() +print('в файле должно оказаться:', file) +import re +def open_html(f): + with open(f, 'r', encoding='utf-8') as file: + content = file.read() + return content +content = open_html(r'C:\Users\Никита\Desktop\hw15 cats and dinos\dinos.html') +m = re.sub('динозавр', 'кот', content, flags= re.M) +p = re.sub('Динозавр', 'Кот', m, flags= re.M) +print(p) +with open('result.txt', 'w', encoding='utf-8') as file: + print(p, file=file) +print('результат распечатан в файл') +import re +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split() + l1 = [] + for word in l: + l1.append(word.strip('.,;:?![]')) + f.close() + return l1 +def regexp(text): + for word in text: + if re.search('.*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ].*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ].*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ]' , word): + print(word) +def main(): + text = text_process('text.txt') + regexp(text) +main() +word = input() +while len(word) > 0: + print('Nominative singular') + print('Accusative singular') + if word.endswith('а') or word.endswith('я'): + print('Genitive singular') + print('Accusative singular') + print('Nominative plural') + if word.endswith('у') or word.endswith('ю'): + print('Dative singular') + if word.endswith('ом') or word.endswith('ем'): + print('Instrumentalis singular') + if word.endswith('е'): + print('Prepositive singular') + if word.endswith('ы'): + print('Nominative plural') + print('Accusative plural') + if word.endswith('ов') or word.endswith('ев') or word.endswith('ой') or word.endswith('ей'): + print('Genitive plural') + print('Accusative plural') + if word.endswith('ам') or word.endswith('ям'): + print('Dative plural') + if word.endswith('ами') or word.endswith('ями'): + print('Instrumentalis plural') + if word.endswith('ах') or word.endswith('ях'): + print('Prepositive plural') + word = input() +def corpus_process(text_name): + f = open(text_name, 'r') + text = f.read() + l = [] + for line in text: + l.append(line) + f.close() + return l +def count_frequency(word, text): + n = 0 + for s in text: + if word in s: + n += 1 + return n +def lemma_dict(text): + dictionary = {} + for s in text: + if '', '', text_str) + text = re.sub('\n', '', text) + text_arr = text.split(' ') + return text_arr +def find_ins(text_lines): + + words_ins = [] + for line in text_lines: + if '=ins' in line: + word = re.match('(.*?)<', line).group(1) + words_ins.append(word) + return words_ins +def make_string(words_ins, text_arr): + + line_arr = [] + text_arr_e = [i.strip('&.,?:"«»;!()') for i in text_arr] + for word in words_ins: + n = text_arr_e.index(word) + left_context = [] + for i in range(n-3, n-1): + try: + left_context.append(text_arr[i]) + except Exception: + continue + right_context = [] + for i in range(n+1, n+3): + try: + right_context.append(text_arr[i]) + except Exception: + break + line = ' '.join(left_context)+'\t'+word+'\t'+' '.join(right_context) + line_arr.append(line) + string = '\n'.join(line_arr) + return string +def main(): + + n = (count_ana_word(open_xml())) + print(n) + + gr_dict = find_lex(open_xml_as_string()) + array = [i+'\t'+gr_dict[i] for i in gr_dict.keys()] + write('\n'.join(array), 'frq_gr.txt') + + write((make_string(find_ins(open_xml()), get_text(open_xml_as_string()))), 'words_ins.txt') +if __name__ == '__main__': + main() +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split() + l1 = [] + for word in l: + l1.append(word.strip('.,;:?!')) + f.close() + return l1 +def count(text, letter, letter2): + n = 0 + for word in text: + if word.startswith(letter) or word.startswith(letter2): + n += 1 + return n +def letters_dict(text): + dictionary = {} + alphabet = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя' + ALPHABET = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ' + for letter in alphabet: + letter2 = ALPHABET[alphabet.find(letter)] + dictionary[letter] = count(text, letter, letter2) +def main(): + text = text_process('text.txt') + dictionary = letters_dict(text) + text2 = open('text2.tsv', 'w') + for letter in dictionary: + text2.write(letter + '\t' + str(dictionary[letter])) + text2.close() +main() +import os +def remove_tree(folder): + for root, dirs, files in os.walk(folder): + for f in files: + os.remove(os.path.join(root, f)) + for d in dirs: + os.rmdir(os.path.join(root, d)) + os.rmdir(folder) +remove_tree('folder') +import csv +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split() + l1 = [] + for word in l: + l1.append(word.strip('.,;:?!')) + f.close() + return l1 +def freq_dict(text): + dictionary = {} + for word in text: + freq = 0 + for i in text: + if i == word: + freq += 1 + dictionary[word] = freq + return sorted(dictionary) +def main(): + text = text_process('text.txt') + dictionary = freq_dict(text) + text2 = open('text2.tsv', 'w') + for word in dictionary: + text2.write(word + '\t' + str(dictionary[word])) + text2.close() +main() +import os +def new_sentence(sentence): + sentence1 = '' + for word in sentence: + sentence1 += word.strip('.,;:?!') + ' ' + sentence1 += '.' + return sentence1 +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + text = text.replace('!','.') + text = text.replace('?', '.') + text = text.replace('...','.') + l = text.split(.) + l1 = [new_sentence(sentence) for sentence in text] + f.close() + return l1 +def count_longest(text): + n = 0 + longest = [] + for sent in text: + sent1 = sent.split(' ') + if len(sent1) > n: + longest = sent1 + n = len(sent1) + return longest +def create_folders(sent): + sent = '/'.join(sent) + os.makedirs(sent) +def main(): + text = text_process('text.txt'): + sent = count_longest(text) + create_folders(sent) +main() +import random +number = random.randint(0,9) +guess = int(input()) +while guess != number: + print('No') + if guess < number: + print ('больше') + else: + print ('меньше') + guess = int(input()) +print('Yes') +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split() + l1 = [] + for word in l: + l1.append(word.strip('.,;:?!')) + f.close() + return l1 +def position_dict(text): + dictionary = {} + for word in text: + dictionary[word] == text.index[word] + return(dictionary) +def main(): + text = text_process('text.txt') + dictionary = position_dict(text) + s = 'Слово {} находится на месте номер {} \n' + text2 = open('text2.txt', 'w') + for word in dictionary: + text2.write(s.format(word, str(dictionary[word]))) + text2.close() +main() +import re +def xml_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split('\n') + l1 = [] + for tag in l: + l1.append(tag) + f.close() + return l1 +def ana_word(xml): + anas = [] + for tag in xml: + if tag startswith.(''): + anas.append(tag.count('') + ' ' + for word in xml[(xml.index(tag))]: + s += '\t' + word.strip('<.*>') + '\t' + for word in xml[(xml.index(tag) + 1),(xml.index(tag) + 3)]: + s += word.strip('<.*>') + ' ' + s += '\n' + return s + def main(): + xml = text_process(text.xml) + print(ana_word(xml)) + dictionary = dict_parts_of_speech(xml) + d = open('dict.txt', 'w') + for i in dictionary: + d.write(i + '\t' + dictionary[i] + '\n') + ins = open('ins.txt', 'w') + for tag in xml: + ins.write(instrumentalis(tag)) + d.close() + ins.close() +main() +import re +def corpus_process(text_name): + f = open(text_name, 'r') + text = f.read() + l = [] + for line in text: + l.append(line) + f.close() + return l +def count_frequency(word, text): + n = 0 + for s in text: + if word in s: + n += 1 + return n +def count_adj(text): + dictionary = {} + for s in text: + if re.search('type=l.f.*', text) != None: + wordtype = s + number = wordtype.find('type=') + wordtype = wordtype[number, len(s)] + wordtype.replace('type="', '') + number2 = wordtype.find('"') + wordtype = wordtype[0, number] + wordtype = '"' + wordtype + n = count_frequency(wordtype, text) + dictionary[wordtype] = n + return dictionary +def write_in_file(corpus): + dictionary = count_adj(corpus) + adjectives = open('adjectives.txt', 'w') + for i in dictionary: + adjectives.write(i + ' ' + dictionary[i] + '\n') + adjectives.close() +def main(): + corpus = corpus.process('corpus.xml') + write_in_file(corpus) + corpus1 = corpus + n1 = corpus1.index('') + n2 = corpus1.index('') + for s in corpus1[n1 + 1, n2 - 1]: + s = re.sub('', ', ', s) + s = re.sub('', '', s) + corpus1_file = open(corpus1.txt, 'w') + for i in corpus1: + corpus1_file.write(i + '\n') + corpus1_file.close() +main() +import os +def create_letters_list(): + file_tree = os.walk('.') + letters = {} + for d in file_tree: + folder_name = d[0].strip('.\/') + letter = folder_name[0] + if letter in letters: + letters[letter] += 1 + else: + letters[letter] = 1 + return letters +def main(): + letters = create_letters_list + letter = '' + n = 0 + for i in letters: + if letters[i] > n: + letter = i + n = letters[i] + print(letter) +main() +import re +import os +import csv +def open_file(xml): + with open(xml, 'r', encoding = 'cp1251') as f: + text = f.readlines() + return text +def open_file_as_string(xml): + with open(xml, 'r', encoding = 'cp1251') as f: + text = f.read() + return text +def count_words(text): + text_as_string = open_file_as_string(text) + return str(text_as_string.count('')) +def find_author(text): + text_as_string = open_file_as_string(text) + author = re.search('') + author = auth.lstrip('') + return author +def find_created(text): + text_as_string = open_file_as_string(text) + created = re.search('') + created = auth.lstrip('') + return created +def main(): + filetree = os.walk('news') + task1 = open('task1.txt', 'w', encoding = 'cp1251') + for root, dirs, files in filetree: + for f in files: + task1.write(f + '\t' + count_words(f) + '\n') + task1.close() + task2 = open('task2.csv', 'w', encoding = 'cp1251') + writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL) + for root, dirs, files in filetree: + for f in files: + f.writerow([f] + [find_author(f)] + [find_created(f)]) +if __name__ == '__main__': + main() +import re +import os +import csv +def open_file(xml): + with open(xml, 'r', encoding = 'cp1251') as f: + text = f.readlines() + return text +def open_file_as_string(xml): + with open(xml, 'r', encoding = 'cp1251') as f: + text = f.read() + return text +def count_words(text): + text_as_string = open_file_as_string(text) + return str(text_as_string.count('')) +def find_author(text): + text_as_string = open_file_as_string(text) + author = re.search('') + author = auth.lstrip('') + return author +def find_created(text): + text_as_string = open_file_as_string(text) + created = re.search('') + created = auth.lstrip('') + return created +def main(): + filetree = os.walk('news') + task1 = open('task1.txt', 'w', encoding = 'cp1251') + for root, dirs, files in filetree: + for f in files: + task1.write(f + '\t' + count_words(f) + '\n') + task1.close() + task2 = open('task2.csv', 'w', encoding = 'cp1251') + writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL) + for root, dirs, files in filetree: + for f in files: + f.writerow([f] + [find_author(f)] + [find_created(f)]) +if __name__ == '__main__': + main() +import os +filetree = os.walk('news') +for root, dirs, files in filetree: + for f in files: + print(f) +def new_sentence(sentence): + sentence1 = '' + for word in sentence: + sentence1 += word.strip('.,;:?!') + ' ' + sentence1 += '.' + return sentence1 +def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + text = text.replace('!','.') + text = text.replace('?', '.') + text = text.replace('...','.') + l = text.split(.) + l1 = [new_sentence(sentence) for sentence in text] + f.close() + return l1 +def create_dict(text): + dictionary = {sentence: {word: len(word) for word in sentence} for sentence in text} +def main(): + text = text_process('text.txt') + return(create_dict(text)) +main() +import random +n = open('nouns.txt', 'r') +nouns = [line.strip() for line in n] +v = open('verbs.txt', 'r') +verbs = [line.strip() for line in v] +c = open('clitics.txt', 'r') +clitics = [line.strip() for line in c] +n2 = open('nouns2.txt', 'r') +nouns2 = [line.strip() for line in n2] +p = open('marks.txt', 'r') +punctuation = [line.strip() for line in p] +i = open('imperatives.txt', 'r') +imperative = [line.strip() for line in i] +def verse1: + return (random.choice(nouns)+ ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation)) +def verse2: + return(random.choice(imperative) + ' ' + random.choice(nouns) + ' ' + random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(punctiation)) seq)) +def verse3: + return (random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation)) +def make_verse: + verse = random.choice([1,2,3]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + else: + return verse3() +for n in range(4): + print(make_verse)import os +def symbols(s): + ans = True + for i in s: + if i not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz': + ans = False + return ans +def main(): + n = 0 + for f in os.listdir('.'): + if symbols(f) = True: + n += 1 + print (f) + print (n) +main() +s = input() +l = [] +while s != '': + if len(s) > 5: + l.append(s) + s = input() +for i in(l): + print(i)s = input() +for i in range(0, len(s) + 1): + print(s[0:i])def text_process(text_name): + f = open(text_name, 'r', encoding='utf-8') + text = f.read() + l = text.split() + l1 = [] + for word in l: + l1.append(word.strip('.,;:?![]{}')) + f.close() + return l1 +def count_ness(text): + list_ness = [] + for word in text: + if word.endswith(ness): + list_ness.append(word) + return list_ness +def frequency(word, text): + n = 0 + for i in text: + if i == word: + n += 1 + return n +def main(): + text = text_process('text.txt') + words = {} + for word in count_ness(text): + words[word] = frequency(word, text) + frequencies = word.values() + print(len(count_ness(text))) + print(max(frequencies)) +main()import re +def count_line(): + with open('Test.xml', 'r', encoding = 'utf-8') as f: + s = 1 + for line in f: + if line != ' \n': + s += 1 + else: + break + return s +def write_in(): + with open('Test.txt', 'w', encoding = 'utf-8') as f: + num = count_line() + f.write(str(num)) + return +write_in() +def open_text(): + with open('Test.xml', 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def phrase(): + text = open_text() + d = {} + reg = re.findall(r'.*?',text) + for i in range(len(reg)): + if reg[i] not in d: + d[reg[i]] = 1 + else: + d[reg[i]] +=1 + return d +def write_phrase(): + with open('Test1.txt', 'w', encoding = 'utf-8') as f: + d = phrase() + for key in d: + f.write(key + ',' + str(d[key])+ '\n') + return +write_phrase() +def n(): + text = open_text() + reg = re.findall(r'(.*?)',text) + return reg +print(n()) +word = input('Введите слово') +for i in range(len(word)): + print(word[i::] + word [:i]) +print('Введите число') +a=float(input()) +print('Введите число') +b=float (input()) +print('Введите число') +c=float(input()) +if a%b==c: + print ('a даёт остаток c при делении на b') +else: + print('a не даёт остаток c при делении на b') +if a*c+b==0: + print ('c является решением линейного уравнения ax + b = 0') +else: + print('c не является решением линейного уравнения ax + b = 0') +import os +def files(): + dic={} + for root, dirs, files in os.walk('.'): + for f in files: + f = f[f.rfind('.')+1:] + if f not in dic: + dic[f] = 1 + else: + dic[f]+=1 + for key in dic: + if dic[key] == max(dic.values()): + return key +print(files()) +import random +def noun_f(): + file = open ('Существительные_ж.txt' , 'r', encoding = 'utf-8') + for line in file: + noun = line.split() + file.close() + return random.choice(noun) +def noun_m(): + file = open ('Существительные_м.txt' , 'r', encoding = 'utf-8') + for line in file: + nouns = line.split() + file.close() + return random.choice(nouns) +def noun_number_of(): + file = open ('Существительные_множественные.txt' , 'r', encoding = 'utf-8') + for line in file: + nouns = line.split() + file.close() + return random.choice(nouns) +def adjective_m(word): + file = open ('Прилагательные_м.txt' , 'r', encoding = 'utf-8') + for line in file: + adjectives = line.split() + file.close() + return random.choice(adjectives) + ' ' + word +def adverb(): + file = open ('Наречия.txt' , 'r', encoding = 'utf-8') + for line in file: + adverbs = line.split() + file.close() + return random.choice(adverbs) +def verb_f(subj): + file = open ('Глаголы_ж.txt' , 'r', encoding = 'utf-8') + for line in file: + verbs = line.split() + file.close() + return random.choice(verbs) + ' ' + subj +def verb_m(adv,n): + file = open ('Глаголы_м.txt' , 'r', encoding = 'utf-8') + for line in file: + verbs = line.split() + file.close() + return adv + ' ' + n+ random.choice(verbs)+ ' ' +def verb_inf(): + file = open ('Глаголы_инф.txt' , 'r', encoding = 'utf-8') + for line in file: + verbs = line.split() + file.close() + return random.choice(verbs) +def verb_transitive(obj): + file = open ('Глаголы_переход.txt' , 'r', encoding = 'utf-8') + for line in file: + verbs = line.split() + file.close() + return ', который ' + random.choice(verbs) + ' ' + obj +def verb_imp(): + file = open ('Глаголы_пов.txt' , 'r', encoding = 'utf-8') + for line in file: + verbs = line.split() + file.close() + return random.choice(verbs) +def time(): + file = open ('Время.txt' , 'r', encoding = 'utf-8') + for line in file: + time = line.split() + file.close() + return random.choice(time) +def pronoun(): + file = open ('Местоимения.txt' , 'r', encoding = 'utf-8') + for line in file: + pronouns = line.split() + file.close() + return random.choice(pronouns) +def no(): + no = [ 'не ', ''] + return random.choice(no) +def random_sentence1(): + sentence = 'Иди и ' + verb_imp() + ' мне ' + noun_m()+'а' + '!' + return sentence +def random_sentence2(): + sentence = adjective_m(noun_m()) + verb_transitive(noun_number_of())+ ',' +\ + verb_m(adverb(), no()) + verb_inf() + '.' + return sentence +def random_sentence3(): + sentence = 'Где ' + time() + ' ' + verb_f(noun_f()) + '?' + return sentence +def random_sentence4(): + sentence = 'Если б ' + pronoun() + ' был ' + noun_m()+ ', то ' +\ + verb_m(adverb(), no())+ ' бы ' + verb_inf() + '.' + return sentence +def random_text(): + sentences = [random_sentence1(), random_sentence2(), random_sentence3(), random_sentence4()] + return random.choice(sentences) +print("---- FASCINATING MASTERPIECE STARTS HERE ----") +num_of_sents = 5 +for i in range(num_of_sents): + sentence = random_text() + sentence = sentence.capitalize() + print(sentence, end=' ') +print("\n---------AND ENDS HERE ---------") +print ('Введите слово') +word = input() +for letter in word[::-1]: + if letter not in 'з,я': + print (letter) + if letter in 'з,я': + continue + print (letter) +import re +import os +def text_read(): + for root, dirs, files in os.walk('.'): + for f in files: + if f.endswith('.xml'): + with open( f, 'r', encoding = 'utf - 8') as text: + text = text.read() + return text +def count(): + text = text_read() + reg1 = re.findall(r'.*', text) + num = len(reg1)/len(reg2) + return num +print(count()) +def part_of_speech(): + text = text_read() + dic = {} + reg = re.findall(r'gr="([A-Z]*)', text) + for i in reg: + if i not in dic: + dic[i] = 1 + else: + dic[i]+=1 + return dic +print(part_of_speech()) +def write_in(): + with open('Test1.txt', 'w', encoding = 'utf-8') as f: + d = part_of_speech() + template = '{}{:>10}' + for key in sorted(d): + f.write((template.format(key, d[key]))+ '\n') + return + +def write(): + with open('Test1.txt', 'w', encoding = 'utf-8') as f: + d = part_of_speech() + for key in sorted(d): + f.write(key+'\t'+str(d[key])+ '\n') + return +write() +import re +def open_text(): + with open('Programming.txt', 'r', encoding = 'utf - 8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip(',.?!-') + return arr +def prog(): + arr = open_text() + regex = r'\bпрограммир(ова(ть(ся)?|нн(ым|о(е|го|му?))|вш(ая|ую|и(е|й|ми?|х)|е(й|е|му?|го))(ся)?|в|л([иа]?(сь)?)|(ся)?)|у((я(сь)?|ем(о(е|го|й|му?)|ы(е|й|х|ми?)|ая|ую)|ю(щ(ая|ую|и(е|й|х|ми?)|е(го|й|му?))(ся)?))|ют(ся)?|е((шь|т|ем)(ся)?)|ю(сь)?|ете(сь)?))\b' + arr1 = [] + for i in range(len(arr)): + m = re.search(regex,arr[i]) + if m != None: + if arr[i] in arr1: + pass + else: + arr1.append(arr[i]) + return ', '.join(map(str,arr1)) +print(prog()) +import re +def open_s(): + with open ('Высшая школа экономики — Википедия.html','r', encoding = 'utf - 8')as f: + content = f.read() + links = r'Преподаватели
' + if re.search(card_reg, text): + card = re.search(card_reg, text).group() + t_reg = 'Преподаватели(?:.|\n)*?

(.+?)<' + if re.search(t_reg, card): + profs = re.search(t_reg, card).group(1) + with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: + f.write(profs) + else: + print('No data about the nuber of professors found!') + with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: + f.write('No data about the nuber of professors found!') + else: + print('No card found in this article!') + with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: + f.write('No card found in this article!') +if __name__ == '__main__': + main() +import os +import re +def tagsaway(sentence): + s = '' + for word in sentence: + word = re.sub(u'<.+?>', u'', word) + s = s + word + ' ' + return s +def get_bigramms(text): + bi = [] + text = text.split('') + for i, word in enumerate(text): + if 'gr="A=' and 'gen' in word: + if i+1 < len(text): + w = text[i+1] + if 'gr="S,' and 'gen' in w: + result1 = re.search('(.+?)', word) + result2 = re.search('(.+?)', w) + bi.append([result1.group(1), result2.group(1), tagsaway(text)]) + return bi +def newfile(arr): + f = open('bigramms.txt', 'w', encoding = 'utf8') + s = '' + for i in arr: + s = i[0] + '\t' + i[1] + '\t' + i[2] + '\n' + f.write(s) + s = '' + f.close +def filework(): + folder = 'news' + for file in os.listdir(folder): + with open(os.path.join(folder, file)) as text: + text = text.read().split('') + for se in text: + newfile(get_bigramms(se)) +def main (): + filework() +main() + + +import os +import re +def get_author (text): + for word in text: + if 'name="author"' in word: + result = re.search('content="(.+?)"', word) + return result.group(1) +def get_day (text): + for word in text: + if 'name="created"' in word: + result = re.search('content="(.+?)"', word) + return result.group(1) +def file_inf(): + ff = [] + folder = 'news' + for file in os.listdir(folder): + with open(os.path.join(folder, file)) as text: + text = text.read().split('<') + ff.append([file, get_author(text), get_day(text)]) + return ff +def newfile(arr): + f = open('files_info.csv', 'w', encoding = 'utf8') + f.write('Название файла;Автор;Дата создания текста\n') + s = '' + for i in arr: + s = i[0] + ';' + i[1] + ';' + i[2] + '\n' + f.write(s) + s = '' + f.close +def main (): + ff = file_inf() + newfile(ff) +main() +import os +import re +def files(): + ff = {} + folder = 'news' + for file in os.listdir(folder): + with open(os.path.join(folder, file)) as text: + words = re.findall('', text.read()) + ff[file] = len(words) + return ff +def newfile(dic): + f = open('words_in_files.txt', 'w', encoding = 'utf8') + s = '' + for k in dic: + s = k + '\t' + str(dic[k]) + '\n' + f.write(s) + s = '' + f.close +def main (): + ff = files() + newfile(ff) +main() +import os +def findanddel (folder): + for root, dirs, files in os.walk(folder, topdown = False): + for f in files: + os.remove(os.path.join(root, f)) + for d in dirs: + os.remove(os.path.join(root, d)) +def main (): + folder = input() + findanddel (folder) +main () +import os +def draw (): + for root, dirs, files in os.walk ('.'): + for d in dirs: + print ('\t'*root.count('\\'), '--',d) + for f in files: + print ('\t'*root.count('\\'), f) +def main (): + draw() +main () +def pointsaway (file): + file = file.split() + for i, word in enumerate (file): + file[i] = file[i].strip('.,?!()*&^%$ + file[i] = file[i].lower() + return file +def words (file): + slova = {} + for word in file: + if word in slova: + slova[word] += 1 + else: + slova[word] = 1 + return slova +def creation (dic): + f = open ('file.tsv', 'w', encoding = 'utf8') + arr = [] + for k in dic: + arr.append(k) + arr.sort() + for i in arr: + f.write(i + '\t' + str(dic[i]) + '\n') + f.close() +def main (): + f = open ('file.txt', 'r', encoding = 'utf8') + file = f.read() + f.close() + text = pointsaway (file) + semua = words (text) + creation (semua) +main () +def pointsaway (file): + file = file.split() + for i, word in enumerate (file): + file[i] = file[i].strip('.,?!()*&^%$ + file[i] = file[i].lower() + return file +def creation (text): + f = open ('new.txt', 'w', encoding = 'utf8') + dic = {text[x]: x for x in range(0, len(text))} + arr = [k for k in dic] + arr.sort() + for i in arr: + f.write('{}\t{}\n'.format(i, str(dic[i]))) + f.close() +def main (): + f = open ('file.txt', 'r', encoding = 'utf8') + file = f.read() + f.close() + text = pointsaway (file) + creation (text) +main () +import re +def get_word (word): + result = re.search('.*?(\w+)', word) + if result: + return result.group(1) + else: + return None +def find_ins (text): + inst = {} + for i, word in enumerate(text): + if 'gr="S' in word: + if 'ins' in word: + inst[i]=word + return inst +def newfile (words, text): + f = open ('ins.txt', 'w', encoding = 'utf8') + s = '' + for k in words: + i = 0 + j = 1 + while i<3: + if get_word(text[k-j]) != None: + s = get_word(text[k-j])+ ' ' + s + i += 1 + j += 1 + else: + j += 1 + s = s + '\t' + get_word(words[k]) + '\t' + i = 0 + j = 1 + while i<3: + if get_word(text[k+j]) != None: + s = s + ' ' + get_word(text[k+j]) + i += 1 + j += 1 + else: + j +=1 + f.write(s) + f.close +def main (): + f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8') + file = f.read() + text = file.split('\n') + f.close() + ss = find_ins(text) + newfile (ss, text) +main () +import re +def find_and_count (file): + pos = {} + for word in file: + word = word.split('<') + for part in word: + result = re.search('.*?gr="(\w+)', part) + if result: + print (result.group(1)) + if result.group(1) not in pos: + pos[result.group(1)] = 1 + else: + pos[result.group(1)] += 1 + return pos +def newfile (dic): + f = open('pos.txt', 'w', encoding = 'utf8') + s = '' + for k in dic: + s = s + k + '\t' + str(dic[k]) + '\n' + f.write(s) + f.close +def main (): + f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8') + file = f.read() + text = file.split('\n') + f.close() + pos = find_and_count(text) + newfile (pos) +main () +import re + +def find_w (file): + words = re.findall('', file) + n = len(words) + return n +def find_ana (file): + anas = re.findall('(.+?)', line) + if a: + if a.group(2) not in types: + types[a.group(2)] = 0 + return types +def countthem (file, types): + words = [] + sum = 0 + for key in types: + words.append(key) + for el in words: + for line in file: + if '"'+el+'"' in line: + sum += 1 + types[el] = sum + sum = 0 + return types +def newfile (types): + s = '' + f = open('adj.txt', 'w', encoding = 'utf8') + for key in types: + s = s + key + '-' + str(types[key]) + '\n' + f.write(s) + f.close() +def main(): + text = filework() + dic = findthem(text) + dic = countthem (text, dic) + newfile(dic) +main() +import random +file=open ('file_6.6.txt', 'r') +def noun(): + nouns=[] + for line in file: + if ' n ' in line: + line=line.split(' ') + nouns.append(line[0]) + file.seek(0, 0) + return random.choice(nouns) +def pronoun(): + pronouns=[] + for line in file: + if ' pn ' in line: + line=line.split(' ') + pronouns.append(line[0]) + file.seek(0, 0) + return random.choice(pronouns) +def verb(): + verbs=[] + for line in file: + if ' v ' in line: + line=line.split(' ') + verbs.append(line[0]) + file.seek(0, 0) + return random.choice(verbs) +def adjective (): + adjectives=[] + for line in file: + if ' adj ' in line: + line=line.split(' ') + adjectives.append(line[0]) + file.seek(0, 0) + return random.choice(adjectives) +def suborob (n, adj, pn): + x=random.randint(0,1) + if x==0: + return pn + else: + y=random.randint(0,1) + if y==0: + return n+' '+adj + else: + return n+' '+pn +def declarative (subj, v, obj): + return subj.capitalize()+' '+v+' '+obj+'.' +def question (subj, v): + x=random.randint (0, 1) + if x==0: + return 'Apa'+' '+subj+' '+v+'?' + else: + return 'Siapa'+' '+v+'?' +def negative (subj, v, obj): + x=random.randint(0, 1) + if x==0: + return subj.capitalize()+' tidak '+v+' '+obj+'.' + else: + return subj.capitalize()+' bukan '+obj+'.' +def imperative (v, obj): + x=random.randint(0,1) + if x==0: + return v.capitalize()+' '+obj+'!' + else: + return 'Jangan '+v+' '+obj+'!' +def conditional (subj1, v1, obj1, subj2, v2, obj2): + return 'Kalau '+subj1+' '+v1+' '+obj1+', '+subj2+' '+v2+' '+obj2+'.' +def sequence (): + a=[1, 2, 3, 4, 5] + b=[] + for i in range (5): + x=random.choice(a) + while x in b: + x=random.choice(a) + b.append(x) + return b +def text(): + seq=sequence() + for i in range (5): + if seq[i]==1: + print(declarative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) + elif seq[i]==2: + print (question(suborob(noun(), adjective(), pronoun()), verb())) + elif seq[i]==3: + print (negative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) + elif seq[i]==4: + print (imperative(verb(), suborob(noun(), adjective(), pronoun()))) + else: + print (conditional(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()), suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) +text() +file.close() +import re +def search (text): + otr = re.search ('

\n\n\n', text) + if otr: + result = otr.group(3) + return result +def main (): + f = open('file_10.6.html', 'r', encoding = 'utf8') + file = f.read() + f.close() + ans = search (file) + print (ans) +main() +def pointsaway (file): + file = file.replace('?!', '.') + file = file.split('.') + for i, word in enumerate (file): + file[i] = file[i].replace('.,?!()*&^%$ + file[i] = file[i].replace('-- ', ' ') + file[i] = file[i].lower() + return file +def tenplus (text): + for sentence in text: + sentence = sentence.split() + n=0 + s=0 + for word in sentence: + word = word.strip('.,?!()*&^%$ + s+=len(word) + n+=1 + if n>10: + print ("Это предложение со словами длины %s"%(str(round(s/n, 1)))) +def main (): + f = open ("file_12.6.txt", "r", encoding = "utf8") + file = f.read() + f.close() + text = pointsaway (file) + tenplus (text) +main () +import random +def intothedic (file): + dic={} + for line in file: + line = line.split(';') + for j, word in enumerate(line): + line[j] = word.strip('\n') + dic[line[0]] = line[1] + return dic +def youchoose (dic): + keys = [] + for key in dic: + keys.append(key) + return random.choice(keys) +def thegameison (noun, dic): + for key in dic: + if key == noun: + hint = dic[key] + n=key + break + print (hint, '...') + for i in range (3): + if input() == n: + print ('Победа!') + break + else: + if i == 0: + print ('Ещё 2 попытки') + continue + elif i == 1: + print ('Ещё 1 попытка') + continue + else: + print ('GAME OVER') +f = open('file_8.6.csv', 'r', encoding = 'utf8') +file = f.readlines() +f.close() +words = intothedic(file) +word = youchoose(words) +thegameison(word, words) +import re +def pointsaway (file): + file = file.split() + for i, word in enumerate (file): + file[i] = file[i].strip('.,?!()*&^%$ + file[i] = file[i].lower() + return file +def findverbs (file): + verbs = [] + for word in file: + if re.search ('загру(з(и.*|ят.*)|ж(у.*|ен.*))', word) != None: + if word not in verbs: + verbs.append(word) + return verbs +f = open ('file_9.6.txt', 'r', encoding = 'utf8') +file = f.read() +f.close() +file = pointsaway(file) +verbs = findverbs(file) +print (verbs) +def filework (): + f = open('corpus.txt', 'r', encoding = 'utf8') + file = f.readlines() + f.close() + return file +def newfile (text): + f = open('lines.txt', 'w', encoding = 'utf8') + f.write(str(len(text))) + f.close +def main(): + text = filework() + newfile(text) +main() +import os +def names(array): + names = [] + for name in array: + if os.path.isfile(name): + names.append(name) + return names +def haspoints(array): + points = ',!_-' + s = 0 + su = 0 + for name in array: + for c in name: + if c in points: + s += 1 + if s > 0: + su += 1 + s = 0 + print ("Знаки препинания есть в названии такого количества файлов: ", su) +def main(): + files = names (os.listdir('.')) + print (os.listdir('.')) + haspoints (files) +main() +a=input ('Введите число ') +for i in range (1, 11): + print (i, '*', a, '=', i*int(a)) +xs=[] +for i in range (7): + xs.append(int(input())) +for i in range (7): + if xs[i]>0: + for j in range (xs[i]): + print ('x', end='') + print ('\n') + else: + print ('\n') +word=input() +l=len(word) +while l>0: + word=list(word) + x=word.pop(0) + print (''.join(word)) + l=len(word) +def pointsaway (file): + file = file.split() + for i, word in enumerate (file): + file[i] = file[i].strip('.,?!()*&^%$ + file[i] = file[i].lower() + return file +def findomni (file): + omni = [] + for word in file: + if word.startswith('omni'): + w = word.replace('omni', '') + omni.append([word, w]) + return omni +def findwords (array, file): + n = 0 + m = 0 + for i, k in enumerate (array): + for word in file: + if word == array[i][0]: + n += 1 + if word == array[i][1]: + m += 1 + print (array[i][0], n, '-', array[i][1], m) + n = 0 + m = 0 +def main (): + name = input('Введите имя файла ') + f = open (name, "r") + file = f.read() + f.close() + file = pointsaway (file) + findwords(findomni(file), file) +main() +import re +def filework(): + f = open('corpus.txt', 'r', encoding = 'utf8') + file = f.readlines() + f.close() + return file +def findthem (file): + types = {} + for line in file: + a = re.search('(.+?)', line) + if a: + if a.group(2) not in types: + types[a.group(2)] = 0 + return types +def countthem (file, types): + words = [] + sum = 0 + for key in types: + words.append(key) + for el in words: + for line in file: + if '"'+el+'"' in line: + sum += 1 + types[el] = sum + sum = 0 + return types +def newfile (types): + s = '' + f = open('types.txt', 'w', encoding = 'utf8') + for key in types: + s = s + key + '\n' + f.write(s) + f.close() +def main(): + text = filework() + dic = findthem(text) + dic = countthem (text, dic) + newfile(dic) +main() +capital='АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ' +s=0 +cap=0 +f=open('file_5.6.txt', 'r', encoding='utf8') +file=f.readlines() +l=len(file) +for i in range (0, l): + file[i]=file[i].split(' ') +for i in range (0, l): + m=len(file[i]) + for j in range (0, m): + if file[i][j][0] in capital: + cap+=1 + s+=1 +print ((cap/s)*100) +f.close() +import xml.etree.ElementTree as a +from os import walk +def sent(filename): + tree = a.parse('./news/'+file) + root = tree.getroot() + tmp = root.findall('.//se') + return(len(tmp)) +def move(res,filename): + res_file = open(filename, 'w') + for item in res: + res_file.write(item+'\n') +f = [] +words = [] +p = './news'; +for (dirpath, dirnames, filenames) in walk(p): + f.extend(filenames) + break +for file in f: + words.append(file+'\t'+str(sent(file))) +move(words,'counted.txt') +n=[] +while True: + word = input ('word:') + if word ==(''):break + elif word.endswith ('tur'): + n.append(word) + print ('/n'. join(n)) +d={'Germany':'Berlin','France':'Paris', + 'USA':'Washington DC', + 'Russia':'Moscow'} +for key in d: + print (key+"*"+d[key]) +def capital(a): + a=input("Country:") + if a in d: + return (d[a]) + else: print ("oops") +def revert (): + k={} + countries=d.keys() + for key in countries: + k[d[key]]=key + return (k) +a=revert() +print(a) + +with open ('fr.txt', 'r', encoding = 'utf-8') as a: + text=a.readlines() + for line in text: + if 'союз' in line: + print (line) +n = input("WORD: ") +m = len(n)//2 +print (n[:m], n[:m-1:(-1)]) + + + + + + + + + + + + + + + + + + + + + + + + + + +import os +import shutil +name=input ('напишите што-нибудь') +f_name=name.replace(' ', '\\') +os.makedirs(f_name) + + + + + +n = input ('word:') +for i in range (len(n)): + print(n [-i-1::]) + + + + + + + + + + + + + + + + + + + + + + + +import re +def open_html('xenokeryx.html'): + with open ('xenokeryx.html', 'r', encoding='utf-8') as f: + content=f.read() + return content +def find_links (content): + reg=r'(.*?)' + links=re.findall (reg, content) + return links +text=open_html ('xenokeryx.html') +links=find_links(text) +for link in links [:20]: + print (link[1], '-->', link[0]) +a=open (input(), 'r', encoding='utf-8') +b=0 +c=0 +for line in a: + arr=line.split() + b=b+len(arr) + for d in arr: + if len(d)>10: + c=c+1 + a.close() + e=c/b*100 + print (e, '%') + + + + + + + + + + + + + + + + + + +import os +def sup(): + for root,dirs,files in os.walk('.'): + num=root.count('\\') + root+ root.split('\\')[-1] + print ('\t'*(num), root, sep='--') + for f in files: + print ('\t'*(num+1), f) +sup() +def opentext (file.txt): + forms = [] + with open (file.txt, 'r', encoding='utf-8') as a: + text=a.read() + forms=text.split() + for i in range(len(forms)): + forms[i]=forms[i].strip(.,?!:;()) + return forms +def word (): + a=opentext(file.txt) + b=[] + for i in range (len(a)): + if a[i][-1]=='s': + if a[i][-2]=='u': + if a[i][-3]=='o': + b.append(a[i]) + print (b) + c=b.split() + d=str.count(c) + return d +import random +def noun (): + file=open('Mnoun.txt', 'r', encoding='utf-8') + f=readlines() + nouns=[] + for line in f: + nouns.append(line.split(" ")) + return random.choise(nouns) +def verb (): + file=open ('verb1.txt', 'r', encoding='utf-8') + f=readlines() + verbs=[] + for line in f: + verbs.append(line.split(" ")) + return random.choise(verbs) +def adj (): + file=open ('adj.txt', 'r', encoding='utf-8') + f=readlines() + adjectives=[] + for line in f: + adjectives.append(line.split(" ")) + return random.choise(adjectives) +def noun2 (): + file=open ('noun2.txt','r', encoding='utf-8') + f=readlines() + plnouns=[] + for line in f: + plnouns.append(line.split(" ")) + return random.choise(plnouns) +def conj(): + conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"] + return "," + random.choise(conjs) +def noun3 (): + file=open ('noun3.txt', 'r', encoding='utf-8') + f=readlines () + fnouns=[] + for line in f: + fnouns.append (line.split(" ")) + return random.choise(fnouns) +def 2verb (): + file=open ('2verb.txt', 'r', encoding ='utf-8') + f=readlines () + 2verbs=[] + for line in f: + 2verbs.append (line.split(" ")) + return random.choise (2verbs) +def sen (): + return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"." +for i in range(5): + print (sen()) +import xml.etree.ElementTree as a +from os import walk +def sent(filename): + tree = a.parse('./news/'+file) + root = tree.getroot() + tmp = root.findall('.//se') + return(len(tmp)) +def move(res,filename): + res_file = open(filename, 'w') + for item in res: + res_file.write(item+'\n') +f = [] +words = [] +p = './news'; +for (dirpath, dirnames, filenames) in walk(p): + f.extend(filenames) + break +for file in f: + words.append(file+'\t'+str(sent(file))) +move(words,'counted.txt') +import xml.etree.ElementTree as a +from os import walk +import pandas as q +def second(filename): + tree = a.parse('./news/'+file) + root = tree.getroot() + name = root.find(".//*[@name='author']") + topic = root.find(".//*[@name='topic']") + return(name.attrib['content']+":"+topic.attrib['content']) +f = [] +d = [] +p = './news'; +for (dirpath, dirnames, filenames) in walk(p): + f.extend(filenames) +for file in f: + tmp = second(file).split(':') + tmp_arr = [file,tmp[0],tmp[1]] + d.append(tmp_arr) +df = q.DataFrame(d,columns=["название","автор","тема"]) +df.to_csv("2.csv", sep=';', encoding='windows-1251') +import re +def main (): + with open('F.xml', 'r', encoding='utf-8') as f: + text=f.readlines() + return text +n=main () +m=len(n) +print (m) +import re +dic = {} +with open('f.xml') as f: + for row in f: + if(re.match(r'.*',row)): + arr = row.split("\"") + key = arr[3] + if key in dic: + dic[key]=dic[key]+1 + else: + dic[key] = 1; + for key in dic.items(): + print(key+" "+"\r\n") +import os +import shutil +folder='.' +print (os.listdir('.')) +for f in os.listdir('.'): + with open (os.path.join(folder, f)) as text: + print('file: ', f) +a=str_word_count(f, ' ') +filelist = [f for f in os.listdir('.') if os.path.isfile(f)] +if a>1: + print(filelist) +def open_format(crab): + a = [] + with open (crab.txt, 'r', encoding = 'utf-8') as f: + text = f.read() + text = re.sub('\.\.\.|[\.\?]', '!', text) + a = text.split('!')[:-1] + for i in range(len(a)): + a[i] = re.sub('[<>\*\.«»,\'\"]','', a[i]) + a[i] = a[i].strip() + return a +def repeat(): + work=open_format (crab.txt) + words=re.findall(r'([a-zA-Z]+(?:[?:[\'-][a-zA-Z]+)*)',s) + res=[] + for x in range (a,z): + res.append (x) + print (res) +import os +os.listdir('.') +file_tree=os.walk('.') +names = {} +for root, dirs, files in os.walk('.'): + for f in files: + name = f.split('.')[0] + if name not in names: + names[name]=1 + print(len(names)) +import random +def noun (): + file=open('Mnoun.txt', 'r', encoding='utf-8') + f=readlines() + nouns=[] + for line in f: + nouns.append(line.split(" ")) + return random.choise(nouns) +def verb (): + file=open ('verb1.txt', 'r', encoding='utf-8') + f=readlines() + verbs=[] + for line in f: + verbs.append(line.split(" ")) + return random.choise(verbs) +def adj (): + file=open ('adj.txt', 'r', encoding='utf-8') + f=readlines() + adjectives=[] + for line in f: + adjectives.append(line.split(" ")) + return random.choise(adjectives) +def noun2 (): + file=open ('noun2.txt','r', encoding='utf-8') + f=readlines() + plnouns=[] + for line in f: + plnouns.append(line.split(" ")) + return random.choise(plnouns) +def conj(): + conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"] + return "," + random.choise(conjs) +def noun3 (): + file=open ('noun3.txt', 'r', encoding='utf-8') + f=readlines () + fnouns=[] + for line in f: + fnouns.append (line.split(" ")) + return random.choise(fnouns) +def 2verb (): + file=open ('2verb.txt', 'r', encoding ='utf-8') + f=readlines () + 2verbs=[] + for line in f: + 2verbs.append (line.split(" ")) + return random.choise (2verbs) +def sen (): + return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"." +for i in range(5): + print (sen()) +import re +def main(): + with open ('lemon.html', 'r', encoding='utf-8') as f: + text=f.read() + a='+?
Отряд:(.+?)
' + if re.search (a, text): + card = re.search(a, text).group() + b='Семейство(?:.|\n)*?

(.+?)' + if re.search(b, a): + с = re.search(b, a).group(1) + with open ('family.txt', 'a', encoding = 'utf-8') as f: + f.write(с) + else: + print('Family type not found.') + with open ('family.txt', 'a', encoding = 'utf-8') as f: + f.write('Family type not found.') + else: + print('Error!') + with open ('family.txt', 'a', encoding = 'utf-8') as f: + f.write('Error!') +import os +import re +def count_words(): + with open('answer1.txt', 'w', encoding='utf-8') as fout: + for root, dirs, files in os.walk('./news'): + for f in files: + count = 0 + with open(os.path.join(root, f), 'r') as fin: + f1 = fin.read().split() + for line in f1: + if '' in line: + count += 1 + fout.write('%s \t %d \n' %(f, count)) +def annot(): + with open('answer2.csv', 'w', encoding='utf-8') as fout: + fout.write('Название файла \t Автор \t Дата создания') + for root, dirs, files in os.walk('./news'): + for f in files: + with open(os.path.join(root, f), 'r') as fin: + f2 = fin.read() + nam = f + reg1 = '' + reg2 = '' + auth = re.search(reg1, f2).group(1) + date = re.search(reg2, f2).group(1) + fout.write('%s \t %s \t %s \n' %(f, auth, date)) +def bigramms(): + with open('answer3.txt', 'w', encoding='utf-8') as fout: + for root, dirs, files in os.walk('./news'): + for f in files: + with open(os.path.join(root, f), 'r') as fin: + f3 = fin.read().split('\n') + reg = '(.+?)' + for indx, sentence in enumerate(f3): + if '' in sentence: + f3[indx] = [re.search(reg, sentence).group(1), re.search(reg, sentence).group(2)] + else: + f3.remove(sentence) + temp = True + for indx, word in enumerate(f3): + try: + if 'A' in word[0]: + if 'gen' in word[0]: + if 'S' in f3[indx + 1][0]: + if 'gen' in f3[indx + 1][0]: + fout.write('%s %s \n' %(word[1], f3[indx + 1][1])) + except IndexError: + temp = False +def main(): + count_words() + annot() + bigramms() +if __name__ == '__main__': + main() +def done_text(): + f = open('ostin.txt', 'r', encoding='utf-8') + s = f.read().lower().split() + f.close() + for indx, word in enumerate(s): + s[indx] = word.strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') + return s +def count_words(arr): + d = {} + for word in arr: + if word in d: + d[word] += 1 + else: + d[word] = 1 + return d +def count_letters(arr): + dic = {} + alpha = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя' + for letter in alpha: + dic[letter] = 0 + for word in arr: + if word and word[0] in dic: + dic[word[0]] += 1 + return dic +def count_pos(arr): + dic = {key:ind for ind, key in enumerate(arr)} + return dic +def create_antw(dic): + f = open('answer_keys2.tsv', 'w', encoding='UTF-8') + for key in sorted(dic): + f.write('{0}\t{1}\n'.format(key, str(dic[key]))) + f.close() +def main(): + textik = done_text() + create_antw(count_pos(textik)) +if __name__ == '__main__': + main() +my_num = 9 +your_num = int(input('Write a number from 1 to 10, please: ')) +if your_num == my_num: + print('You\'re lucky one :D') +else: + if your_num > my_num: + print('Your number is too big') + else: + print('Your number is too small') + your_num = int(input('Try again: ')) + if your_num == my_num: + print('You\'re lucky one :D') + else: + print('You\'re hopeless') +import re +def split_txt(): + f = open('test1.txt', 'r', encoding='UTF-8') + s = f.read() + s.replace('\n', ' ') + s1 = re.sub('(\?|!|\.\.\.|([а-яa-z.]+ [а-яa-zА-ЯA-Z]{2,}\.))', '\\1^', s) + print(s1) +def main(): + split_txt() +main() +import re +def find_space(fname): + f = open(fname, 'r', encoding='UTF-8') + s = f.read().split() + regex = '«[a-zA-ZА-Яа-я]+?-[0-9]' + wlist = re.findall(regex, s) + print(', '.join(wlist)) +def main(): + find_space('test.txt') +main() +coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' +while True: + s = input('Введите текст: ') + if s == '': + break + s = s.split() + res = '' + if s[0] == 'decode': + s.pop(0) + s = ' '.join(s) + for letter in s: + if letter == '!': + res += ' ' + else: + for indx, i in enumerate(coinc): + if i == letter: + if i == 'A': + res += 'Z' + elif i == 'a': + res += 'z' + elif i == 'А': + res += 'Я' + elif i == 'а': + res += 'я' + else: + res += coinc[indx - 1] + else: + if s[0] == 'code': + s.pop(0) + s = ' '.join(s) + for letter in s: + if letter == ' ': + res += '!' + else: + for indx, i in enumerate(coinc): + if i == letter: + if i == 'Z': + res += 'A' + elif i == 'z': + res += 'a' + elif i == 'Я': + res += 'А' + elif i == 'я': + res += 'а' + else: + res += coinc[indx + 1] + print (res) +print('Программа завершила свою работу!') +words=[] +check = True +while check is True: + inp = input("Введите слово: ") + if inp == "": + check = False + else: + words.append(inp) +for indx in range(len(words) - 1, -1, -1): + print(words[indx]) +check = True +while check is True: + s = input("Введите текст: ") + if s == "": + check = False + else: + res = "" + for letter in s: + if ord(letter) == 90: + res += chr(65) + elif ord(letter) == 122: + res += chr(97) + elif ord(letter) == 1071: + res += chr(1040) + elif ord(letter) == 1103: + res += chr(1072) + else: + res += chr(ord(letter) + 1) + print (res) +print("Программа завершила работу") +names = ['Оля','Маша','Коля','Костя','Нина','Ира'] +surnames=['Кузнецова', 'Сидорова', 'Семенов', 'Иванов', 'Илларионова'] +if len(names) >= len(surnames): + for i in range(len(surnames)): + strng = names[i] + ' ' + surnames[i] + print(strng) + check = len(surnames) + while check < len(names): + print(names[check]) + check += 1 +else: + for i in range(len(names)): + strng = names[i] + ' ' + surnames[i] + print(strng) + check = len(names) + while check < len(surnames): + print(surnames[check]) + check += 1 +def doneText(fname): + f = open(fname, 'r') + s = f.read().split(' ') + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"') + f.close + return s +fnm = input('Введите имя файла: ') +arr = doneText(fnm) +print('Количество слов в тексте = {0}'.format(arr.len())) +my_num = 9 +check = False +while (check == False): + try: + your_num = int(input('Write a number from 1 to 10, please: ')) + except ValueError: + print("It's not a number, I'm out") + break + if your_num == my_num: + print('You\'re lucky one :D') + check = True + else: + if your_num > my_num: + print('Your number is too big') + else: + print('Your number is too small') +print("End of programme") +check = True +while check == True: + word = input('Write a word in cyrillic: ') + if word == "": + check = False + print("Empty word, I'm out") + else: + if word.endswith('о') or word.endswith('н') or word.endswith('р'): + print('Possible forms: Nom. Sg. / Acc. Sg.') + elif word.endswith('а'): + print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.') + elif word.endswith('у'): + print('Possible forms: Dat. Sg.') + elif word.endswith('ом'): + print('Possible forms: Instrum. Sg. / Nom. Sg.') + elif word.endswith('е'): + print('Possible forms: Prep. Sg.') + elif word.endswith('ам'): + print('Possible forms: Dat. Pl.') + elif word.endswith('ами'): + print('Possible forms: Instrum. Pl.') + elif word.endswith('ах'): + print('Possible forms: Prep. Pl.') + elif word.endswith('и'): + print('Possible forms: Nom. Pl.') + else: + print('Possible forms: Gen. Pl.') +print('Thanks for using!') +import os +import re +def rem_dir(name_dir): + for root, dirs, files in os.walk('.', topdown=False): + if re.findall(os.sep + name_dir, root): + for f in files: + os.remove(os.path.join(root, f)) + for d in dirs: + os.rmdir(os.path.join(root, d)) + for d in dirs: + if name_dir == d: + os.rmdir(os.path.join(root, d)) +def print_root(): + s = '--' + for root, dirs, files in os.walk('.'): + print (s + root) + if len(dirs): + s = '\t' + s + for f in files: + print('\t{0}'.format(f)) +def main(): + rem_dir('wrong') + print_root() +if __name__ == '__main__': + main() +word = input('Write a word in cyrillic: ') +if word.endswith('о'): + print('Possible forms: Nom. Sg. / Acc. Sg.') +elif word.endswith('а'): + print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.') +elif word.endswith('у'): + print('Possible forms: Dat. Sg.') +elif word.endswith('ом'): + print('Possible forms: Instrum. Sg.') +elif word.endswith('е'): + print('Possible forms: Prep. Sg.') +elif word.endswith('ам'): + print('Possible forms: Dat. Pl.') +elif word.endswith('ами'): + print('Possible forms: Instrum. Pl.') +elif word.endswith('ах'): + print('Possible forms: Prep. Pl.') +else: + print('Possible forms: Gen. Pl.') +import re +def done_text(fname): + f = open(fname, 'r', encoding='UTF-8') + s = f.read().split() + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><') + f.close + return s +def print_words(s): + regex = '.*[ауоиыеюя].*[ауоиыеюя].*[ауоиыеюя].*' + for word in s: + if re.search(regex, word): + print(word) +def main(): + textik = done_text(input('Введите имя файла с расширением: ')) + print_words(textik) +main() +import os +import re +import math +from math import log +PUNCT = '[.,!«»?&@"$\[\]\(\):;% +def preprocessing(text): + text_wo_punct = re.sub(PUNCT, '', text.lower()) + words = text_wo_punct.strip().split() + return words +def count_tf(word, text): + return text.count(word) / len(text) +def count_df(word, texts): + n = [1 for text in texts if word in text] + return sum(n) +def count_idf(word, texts): + n = len(texts) / (1 + count_df(word, texts)) + return n +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + return log(tf, 10) * log(idf, 10) +def get_texts(): + texts_dic = {} + for root, dirs, files in os.walk('wikipedia'): + for f in files[:50]: + with open(os.path.join(root, f), 'r', encoding='utf-8') as t: + text = preprocessing(t.read()) + texts_dic[f.split('.')[0]] = text + texts = list(texts_dic.values()) + return texts_dic, texts +def fin_output(texts_dic, texts): + for text in texts_dic: + print("Top words in document {}".format(text)) + scores = {} + for word in texts_dic[text]: + scores[word] = count_tfidf(word, texts_dic[text], texts) + sorted_words = sorted(scores.items(), key=lambda x: x[1]) + for word, score in sorted_words[:5]: + print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) +def main(): + a = get_texts() + fin_output(a[0], a[1]) +if __name__ == '__main__': + main() +check = True +coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' +while check is True: + s = input('Введите текст: ') + if s == '': + check = False + else: + res = '' + for letter in s: + if letter == ' ': + res += ' ' + else: + for indx, i in enumerate(coinc): + if i == letter: + if i == 'A': + res += 'Z' + elif i == 'a': + res += 'z' + elif i == 'А': + res += 'Я' + elif i == 'а': + res += 'я' + else: + res += coinc[indx - 1] + print (res) +print('Программа завершила свою работу!') +def done_text(fname): + f = open(fname, 'r') + s = f.read().split() + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><') + f.close + return s +def freq_dic(arr): + dic = {} + for word in arr: + if word not in dic: + dic[word] = 1 + else: + dic[word] += 1 + return dic +def print_dic(dic): + for word in dic: + if dic[word] >= 10: + print(word, dic[word]) +def main(): + my_text = done_text(input('Введите имя файла с расшриением: ')) + print_dic(freq_dic(my_text)) +main() +def done_text(fname): + f = open(fname, 'r') + s = f.read().split() + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') + f.close + return s +def count_syll(arr, n): + res = [] + voc = 'аоуыиеёюя' + for word in arr: + num = 0 + for letter in word: + if letter in voc: + num += 1 + if num == n: + res.append(word) + return res +def first_letter(arr, letter): + res = [] + for word in arr: + if word.startswith(letter): + res.append(word) + return res +def choice(): + fnm = input('Введите имя файла: ') + textik = done_text(fnm) + make_choice = input('Если хотите, чтобы программа считала слоги, введите syllables; иначе - letter: ') + if make_choice == 'syllables': + numb = int(input('Введите количество слогов в словах: ')) + print(' '.join(count_syll(textik, numb))) + else: + lett = input('Введите желаемую первую букву: ') + print(' '.join(first_letter(textik, lett))) +def main(): + choice() +main() +import os +def mk_ppk(s): + s = s.split() + pth = '.' + for word in s: + pth += os.sep + word + if not os.path.exists(pth): + os.makedirs(pth) +def mk_fls(num): + pth = '.' + for ppk in range(num): + pth += os.sep + str(ppk+1) + if not os.path.exists(pth): + os.makedirs(pth) + for pp_quant in range(ppk+1): + f = open(pth + os.sep + str(pp_quant + 1) + '.txt', 'w') + f.close() + pth = '.' +def prnt_dir(nm_dir): + for fl in os.listdir(nm_dir): + if os.path.isdir(fl): + print(fl) +def main(): + mk_ppk(input('Введите приложение: ')) + mk_fls(int(input('Введите число: '))) + prnt_dir('.') +if __name__ == '__main__': + main() +my_num = 9 +check = False +your_num = int(input('Write a number from 1 to 10, please: ')) +while(your_num != my_num): + if your_num > my_num: + print('Your number is too big') + else: + print('Your number is too small') + try: + your_num = int(input('Try again: ')) + except ValueError: + print("Not a number") + check = True + break +if check == True: + print("See you next time") +else: + print("You're right") +check = True +coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' +while check is True: + s = input('Введите текст: ') + if s == '': + check = False + else: + res = '' + for letter in s: + if letter == ' ': + res += ' ' + else: + for indx, i in enumerate(coinc): + if i == letter: + if i == 'Z': + res += 'A' + elif i == 'z': + res += 'a' + elif i == 'Я': + res += 'А' + elif i == 'я': + res += 'а' + else: + res += coinc[indx + 1] + print (res) +print('Программа завершила свою работу!') +f = open('freq_crlf.txt', 'r', encoding='utf-8') +s = f.read().split('\n') +f.close() +for line in s: + line = line.split(' | ') + if line[1] == 'союз': + print(' | '.join(line)) +f = open('freq_crlf.txt', 'r', encoding='utf-8') +s = f.read().split('\n') +f.close() +arr = [] +while True: + word = input('Введите слово: ') + if word == '': + print('Результаты:') + break + else: + arr.append(word) +for word in arr: + check = False + for line in s: + line = line.split(' | ') + if word == line[0]: + print(' | '.join(line)) + check = True + if check is False: + print(u'{0}: Такого слова в словаре нет.'.format(word)) +print('Завершение работы программы')f = open('freq_crlf.txt', 'r', encoding='utf-8') +s = f.read().split('\n') +f.close() +while True: + word = input('Введите слово: ') + if word == '': + print('Завершение работы программы') + break + else: + check = False + for line in s: + line = line.split(' | ') + if word == line[0]: + print(' | '.join(line)) + check = True + if check is False: + print('Такого слова в словаре нет.') +import decimal +f = open('freq_crlf.txt', 'r', encoding='utf-8') +s = f.read().split('\n') +f.close() +ress = '' +ipm_sum = 0 +for line in s: + line = line.split(' | ') + if line[1].find('ед жен') != -1: + ress += line[0] + ress += ', ' + ipm_sum += decimal.Decimal(line[2]) +print(ress) +print(u'Суммарное значение ipm = {0}'.format(ipm_sum))import re +def open_file(): + f = open('islandic.xml', 'r', encoding='UTF-8') + s = f.read() + f.close() + return s +def count_lines(): + s = open_file() + s = s.split('\n') + f = open('answer_length.txt', 'w', encoding='UTF-8') + f.write(str(len(s))) + f.close() +def my_diction(arr): + dix = {} + for word in arr: + if word in dix: + dix[word] += 1 + else: + dix[word] = 1 + return dix +def create_diction(): + s = open_file() + regex = '' + arr = re.findall(regex, s) + dix = my_diction(arr) + f = open('answer_keys.txt', 'w', encoding='UTF-8') + f.write('Отсортированный список морфологических разборов:\n') + for key in sorted(dix): + f.write(key + '\n') + f.close() +def count_adj(): + s = open_file() + regex = '' + arr = re.findall(regex, s) + dix = my_diction(arr) + f = open('answer_adj.txt', 'w', encoding='UTF-8') + for key in sorted(dix): + f.write(key + ' ' + str(dix[key]) + '\n') + f.close() +def create_csv(): + s = open_file() + print(s) + regex1 = '(.+?)' + regex2 = '<.+?>\n' + s = re.sub(regex1, '\\1, \\2, \\3', s) + s = re.sub(regex2, '', s) + s = re.sub('( )+?', '', s) + s = s.split('\n') + f = open('answer_dict.csv', 'w', encoding='UTF-8') + for line in s: + f.write(line + '\n') + f.close() +def main(): + count_lines() + create_diction() + count_adj() + create_csv() +if __name__ == '__main__': + main() +check = True +words = [] +while check is True: + s = input("Введите слово: ") + if s == "": + check = False + else: + temp = [] + for letter in s: + temp.append(letter) + words.append(temp) +for wrd in words: + for letterindx in range(2, len(wrd), 2): + if letterindx >= len(wrd): + break + wrd.pop(letterindx) + s = "" + for letterindx in range(len(wrd) - 1, -1, -1): + s += wrd[letterindx] + print(s)f = open('input.txt', 'r', encoding='UTF-8') +s = f.read().split('\n') +f.close() +avgsum = 0 +for indx, line in enumerate(s): + s[indx] = line.split() + avgsum += len(s[indx]) +print(u'Среднее количество слов в строке = {0}'.format(avgsum / len(s)))import re +word = input("Введите слово на кириллице: ") +pattern1 = r'[А-Яа-я]' +pattern2 = r'[1-9A-Za-z]' +if re.match(pattern1, word) and re.search(pattern2, word) is None: + for indx, letter in enumerate(word): + if indx % 2 != 0: + if letter != "а" and letter != "к": + print(letter) +else: + print("Вводить можно только кириллицу :Р")temp = False +while (temp == False): + try: + a = float(input('Введите первое число (a) ')) + b = float(input('Введите второе число (b) ')) + c = float(input('Введите третье число (c) ')) + temp = True + except (TypeError, ValueError): + print('Просила же только числа вводить!') +if a + b == c: + print('Поздравляю! a + b = c') +else: + print('Прошу прощения, но a + b != c') +if a*c + b == 0: + print('Поздравляю! a*c + b = 0') +else: + print('Прошу прощения, но a*c + b != 0')import random +def ask_name(): + return input('Введите имя файла с расширением: ') +def get_words(): + f = open(ask_name(), 'r', encoding='UTF-8') + s = f.read().split('\n') + f.close() + dic = {} + for ln in s: + temp = ln.split(',') + dic[temp[0]] = temp[1:] + return dic +def guess_word(word): + num = len(word) + if num <= 4: + print('У вас {0} попытки'.format(num)) + else: + print('У вас {0} попыток'.format(num)) + while num > 0: + temp = input('Введите слово: ') + if temp == word: + print('Вы угадали!') + break + else: + print('Попробуйте еще раз!') + num -= 1 + if num == 0: + print('Повезет в другой раз!') +def game(d): + num_check = 0 + for k in d: + print('Подсказка! {0} ...'.format(random.choice(d[k]))) + guess_word(k) + num_check += 1 + if num_check == len(d): + print('Это было последнее слово. Приходите еще') + break + ask = input('Хотите попробовать еще раз? Введите только "да" или "нет": ') + if ask == 'нет': + break +def main(): + d = get_words() + game(d) +main()def done_text(fname): + f = open(fname, 'r') + s = f.read().split() + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') + f.close + return s +def get_fname(): + return input("Введите имя файла с расширением: ") +def count_ing(arr): + res = 0 + for word in arr: + if word.endswith('ing'): + res += 1 + return res +def count_form(arr, form): + res = 0 + for word in arr: + if word == form: + res += 1 + return res +def main(): + textik = done_text(get_fname()) + print('Всего в тексте {0} форм на -ing'.format(count_ing(textik))) + form = input('Введите форму, количество вхождений которой хотите найти: ') + print('Эта форма встречается {0} раз'.format(count_form(textik, form))) +main()import re +def search_inf(fname): + f = open(fname, 'r', encoding='UTF-8') + s = f.read() + f.close() + regex = '>Столица.*?([А-Яа-я]+(-[А-Яа-я]+)*)' + res = re.search(regex, s, re.DOTALL) + if res: + k = open('answer.txt', 'w', encoding='UTF-8') + print(res.group(1)) + k.write(res.group(1)) + k.close() +def main(): + search_inf(input('Введите имя файла: ')) +if __name__ == '__main__': + main()import re +def change_text(): + f = open('mosq1.txt', 'r', encoding='UTF-8') + s = f.read() + f.close() + s1 = re.sub('Комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'Слон\\1', s) + s1 = re.sub('комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'слон\\1', s1) + f = open('antwort.txt', 'w', encoding='UTF-8') + f.write(s1) + f.close() +def main(): + change_text() +if __name__ == '__main__': + main()import random +def generate_adj(): + f = open('adj.txt', 'r') + s = f.read().split() + f.close() + return random.choice(s) +def generate_noun(num): + if num == 'sg': + f_name = 'noun_sg.txt' + else: + f_name = 'noun_pl.txt' + f = open(f_name, 'r') + s = f.read().split() + f.close() + return random.choice(s) +def generate_verb(): + f = open('verbs.txt', 'r') + s = f.read().split() + f.close() + return random.choice(s) +def generate_punct(pos): + if pos == 'end': + f_name = 'end_punct.txt' + else: + f_name = 'mid_punct.txt' + f = open(f_name, 'r') + s = f.read().split() + f.close() + punct = random.choice(s) + if punct == '-': + punct = ' ' + punct + return punct +def generate_pronoun(): + f = open('pronouns.txt', 'r') + s = f.read().split() + f.close() + return random.choice(s) +def generate_intj(): + f = open('intj.txt', 'r') + s = f.read().split('\n') + f.close() + return random.choice(s) +def generate_line(num): + if num == 1: + return generate_adj() + ' ' + generate_noun('sg') + generate_punct('end') + '\n' + elif num == 2: + return generate_verb() + ' ' + generate_noun('pl') + ' и' + '\n' + else: + return generate_pronoun() + generate_punct('mid') + ' ' + generate_intj() + generate_punct('end') + '\n' +def generate_haiku(): + return generate_line(1) + generate_line(2) + generate_line(3) +print(generate_haiku())import re +def done_text(fname): + f = open(fname, 'r', encoding='UTF-8') + s = f.read().lower() + rez1 = '(,|:|№|-|\*|/|\||\[|\]|{|}|\\|(|)|\'|"|[0-9]|«|»|>|<|V|I|X)+' + s = re.sub(rez1, ' ', s) + rez = '\.|\?|!|\.\.\.' + s = re.split(rez, s) + f.close() + for indx, sent in enumerate(s): + s[indx] = sent.split() + if len(s[indx]) == 0: + s.pop(indx) + return s +def count_letters(arr): + mlist = [(indx + 1, word, len(word)) for indx, senten in enumerate(arr) for word in senten if len(word) >= 7] + f = open('answer_sheet12.txt', 'w', encoding='UTF-8') + for k in mlist: + f.write('предложение {0}, {1}-------{2}\n'.format(k[0], k[1], k[2])) + f.close() +def main(): + count_letters(done_text('tolstoy.txt')) +if __name__ == '__main__': + main()import os +import re +def count_dirs(): + res = '[0-9]' + arr = [thing for thing in os.listdir('.') if os.path.isdir(thing) and len(re.findall(res, thing))] + return arr +def print_answer(arr): + fout = open('answer_sheet13.txt', 'w', encoding='UTF-8') + fout.write('Всего папок с цифрами в названии - {0}.'.format(str(len(arr)))) + fout.write('Все имена в директории (без повторений):\n') + clear_names = [] + for thing in os.listdir('.'): + temp = thing + if os.path.isfile(thing): + temp = re.sub('\..+', '', thing) + if temp not in clear_names: + clear_names.append(temp) + for nme in clear_names: + if nme: + fout.write(nme + '\n') + fout.close() +def main(): + print_answer(count_dirs()) +if __name__ == '__main__': + main()import os +def count_dep(): + count = 0 + for root, dirs, files in os.walk('.', topdown=False): + if len(root.split(os.sep)) - 1 > count: + count = len(root.split(os.sep)) - 1 + with open('answer_sheet14.txt', 'w', encoding='UTF-8') as answer: + answer.write(str(count)) +def main(): + count_dep() +if __name__ == '__main__': + main()while True: + s = input("Введите строку: ") + if s == "": + break + for indx, part in enumerate(s): + print(s[:len(s) - indx])s = input("Введите строку: ") +for indx, part in enumerate(s): + print(s[:len(s) - indx])import re +def ask_name(): + return input('Введите имя файла с расширением: ') +def get_words(): + f = open(ask_name(), 'r', encoding='UTF-8') + s = f.read().split() + for indx, word in enumerate(s): + s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') + f.close + return s +def count_words(words): + regex = 'откр(ы|о)((т(ый|ая|ое|ые|ого|ой|ых|ому|ым|ую|ом|ою)|в(ш(ий?|ая|ее|ие|его|ей|их|ему|им|ую|ею))?)|(л(а|о|и)?)|(й(те)?)|(ют?|е(шь|м|те?)))(ся|сь)?' + wlist = [] + for word in words: + if re.fullmatch(regex, word): + if word not in wlist: + wlist.append(word) + return wlist +def main(): + print(', '.join(count_words(get_words()))) +main()word = input('Введите слово: ') +if word == '': + print ('Слово не введено') +word2 = '' +for i in range(len(word)): + for k in range(len(word)): + if k + i < len(word): + word2 += word[k + i] + else: + word2 += word[k + i - len(word)] + print (word2) + word2 = '' +import re +def open_and_edit(): + f = open("verbs.txt", 'r', encoding = "utf-8") + s = f.read() + f.close() + s1 = s.lower() + a = s1.split() + for i, word in enumerate(a): + a[i] = word.strip('.,!?();:*/\|<>-_%& + return a +def find_and_print(a): + arr = [] + for word in a: + if re.search('^программир((у(ю(т|щ(и(й|ми?|е|х)|е(го|му?|й)|ая|ую))?|я|е(шь|те?))|ова(л(а|и)?|ть))(с(я|ь))?|уем(ы(й|ми?|е|х)?|о(го|му?|й)|ая?|ую))', word): + if word not in arr: + arr.append(word) + for verb in arr: + print(verb) +def main(): + text = open_and_edit() + find_and_print(text) +main() +import re +import os +def task1(): + for root, dirs, files in os.walk('.\\news'): + s = '' + for f in files: + file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") + text = file.readlines() + words = 0 + for line in text: + reg = '' + r = re.search(reg,line) + if r: + words += 1 + s += f + '\t' + str(words) + '\n' + f2 = open("words_in_files.txt", 'w', encoding = "utf-8") + f2.write(s) +def task2(): + for root, dirs, files in os.walk('.\\news'): + s = '' + for f in files: + file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") + text = file.readlines() + author = '' + date = '' + for line in text: + reg_author = 'content="([ |(а-яА-яa-zA-Z)]+)" name="author"' + reg_date = 'content="([0-9]+\.[0-9]+\.[0-9]+)" name="created"' + r1 = re.search(reg_author, line) + if r1: + author = r1.group(1) + r2 = re.search(reg_date, line) + if r2: + date = r2.group(1) + s += f + '\t' + author + '\t' + date + '\n' + f3 = open("words_in_files.csv", 'w', encoding = "utf-8") + f3.write(s) +def task3(): + for root, dirs, files in os.walk('.\\news'): + s = '' + for f in files: + file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") + text = file.readlines() + for i, line in enumerate(text): + reg_adj = 'A=.+gen.+>?' + reg_sumj = 'S,.+gen.+>?' + reg_word = '([а-яА-Я]+|`)\n' + r1 = re.search(reg_adj, line) + if r1: + r2 = re.search(reg_word, line) + word1 = r2.group(1) + r3 = re.search(reg_subj, text[i+1]) + if r3: + word2 = r3.group(1) +def main(): + task1() + task2() +main() +import re +def open_and_edit(): + f = open("linguistics.txt", 'r', encoding = "utf-8") + s = f.read() + f.close() + return s +def replace_and_output(s): + s1 = re.sub('язык([а-я]{,3}( |\.|,|\)))','шашлык\\1', s) + s2 = re.sub('Язык([а-я]{,3}( |\.|,|\)))','Шашлык\\1', s1) + f = open("shashlyk.txt", 'w', encoding = "utf-8") + f.write(s2) + print('Текст записан в файл shashlyk.txt') + f.close() +def main(): + text = open_and_edit() + replace_and_output(text) +main() +import os +import re +def count_folders(): + result = 0 + for f in os.listdir('.'): + if os.path.isdir(f): + if re.search('^([а-яА-Я]| )+$',f): + result += 1 + print('Найдено папок:',result) +def print_names(): + names = {} + file_name = '^(.+)(\.[a-z]+)$' + for f in os.listdir('.'): + if os.path.isdir(f): + if f not in names: + names[f] = 1 + if os.path.isfile(f): + r = re.search(file_name,f) + if r: + name = r.group(1) + if name not in names: + names[name] = 1 + for name in sorted(names): + print(name) +def main(): + count_folders() + print_names() +main() +def read_file(): + f = open("words.csv", 'r', encoding = "utf-8") + a = f.readlines() + f.close() + return(a) +def make_dict(a): + words = {} + for line in a: + a2 = line.split(';') + for i, h in enumerate(a2): + a2[i] = h.strip() + words[a2[1]] = a2[0] + return words +def guess(dic): + for noun in dic: + print(dic[noun], '...') + attempt = 0 + while attempt != len(dic[noun]): + print('Осталось попыток: ', len(dic[noun]) - attempt ) + attempt += 1 + if input() == noun: + print('Маладэц!') + attempt = len(dic[noun]) + elif len(dic[noun]) - attempt == 0: + print('Не угадал :(') +def main(): + text = read_file() + words = make_dict(text) + print(words) + guess(words) +main() +import re +def open_and_edit(): + f = open("hse.html", 'r', encoding = "utf-8") + s = f.read() + f.close() + return s +def find_and_print(s): + reg1 = 'Преподаватели.*?\n.*?\n

[0-9]+ ?[0-9]+' + reg2 = 'Преподаватели.*?\n.*?\n

' + res1 = re.findall(reg1,s) + res2 = re.findall(reg2,s) + number = res1[0].replace(res2[0], '') + print('Число преподавателей:',number) + f = open("found_number.txt", 'w', encoding = "utf-8") + f.write(number) + f.close() +def main(): + text = open_and_edit() + find_and_print(text) +main() +words = [] +while True: + newword = input('Введите слово: ') + if newword == '': + break + else: + words.append(newword) +for i in range(len(words)): + string = words[i] + if (i+1) >= len(string): + print('В этом слове не осталось символов') + else: + print(string[i+1:]) +a = input ('Введите число a: ') +b = input ('Введите число b: ') +c = input ('Введите число c: ') +a = int (a) +b = int (b) +c = int (c) +if a % b == c: + print ('a дает остаток c при делении на b') +else: + print ('a не дает остаток c при делении на b') +if a * c + b == 0: + print ('c является решением линейного уравнения ax + b = 0') +else: + print ('c не является решением линейного уравнения ax + b = 0') +import random + +def open_file(): + f = open("words.txt", 'r', encoding = "utf-8") + text = f.readlines() + f.close() + return(text) + +def find_words(word,text): + for i in range(len(text)): + line = [] + line = text[i].split() + for l, w in enumerate(line): + line[l] = w.strip('.,!?();:*/\|<>-_%& + if line[0] == word: + words = [] + for j in range(len(line)): + if j > 0: + words.append(line[j]) + return(words) + +def noun(): + find = 'существительное' + nouns = find_words(find, text) + return random.choice(nouns) + +def imperative(): + find = 'императив' + imper = find_words(find, text) + return random.choice(imper) + +def adverb(imp): + find = 'наречие' + adverbs = find_words(find, text) + return random.choice(adverbs) + ' ' + imp + +def verb(): + find = 'глагол' + verbs = find_words(find, text) + return random.choice(verbs) + +def adjective(): + find = 'прилагательное' + adj = find_words(find, text) + return random.choice(adj) + +def question_word(): + find = 'вопрос' + quest = find_words(find, text) + return random.choice(quest) + +def pos_sentence(): + sentence = adjective() + ' ' + noun() + ' ' + verb() +\ + ' ' + adjective() + ' ' + noun() + '.' + sentence = sentence.capitalize() + return(sentence) + +def neg_sentence(): + sentence = adjective() + ' ' + noun() + ' не ' + verb() +\ + ' ' + adjective() + ' ' + noun() + '.' + sentence = sentence.capitalize() + return(sentence) + +def quest_sentence(): + sentence = question_word()+ ' ' + adjective() + ' ' + noun() +\ + ' ' + verb() + ' ' + adjective() + ' ' + noun() + '?' + sentence = sentence.capitalize() + return(sentence) + +def imper_sentence(): + sentence = adverb(imperative()) + ' ' + noun() + '!' + sentence = sentence.capitalize() + return(sentence) + +def if_sentence(): + sentence = 'если бы ' + noun() + ' ' + verb() + ' ' + noun() +\ + ', то ' + noun() + ' ' + verb() + ' бы ' + noun() + '.' + sentence = sentence.capitalize() + return(sentence) + +def random_print(): + spisok = [pos_sentence(), neg_sentence(), quest_sentence(),\ + imper_sentence(), if_sentence()] + random.shuffle(spisok) + for i in range(len(spisok)): + print(spisok[i], end = ' ') +text = open_file() +random_print() +word = input ('Введите слово: ') +indx = len(word)-1 +while indx >= 0: + if (word[indx]!= 'я') & (word[indx]!= 'з') : + print (word[indx]) + indx -= 1 +import os +import re +def extensions(): + ext_count = {} + for root, dirs, files in os.walk('.'): + for file in files: + ext = re.findall('\.[a-z0-9A-Z]+$', file) + if ext[0] not in ext_count: + ext_count[ext[0]] = 1 + else: + ext_count[ext[0]] += 1 + numb = 0 + found_ext = '' + for ext in ext_count: + if ext_count[ext] > numb: + numb = ext_count[ext] + found_ext = ext + print(found_ext) +def main(): + extensions() +main() +import re +def open_and_read(): + f = open("animal_farm.txt", 'r', encoding = "utf-8") + s = f.read() + f.close() + return s +def read_sentences(s): + s1 = re.sub('[a-z](\.|!|\?)','\\1@@', s) + a = s1.split('@@') + return a +def split_and_count(a): + for i in range(len(a)): + words = a[i].split() + words2 = [words[j].strip('.,!?();:*/\|<>-_%& + for word in range(len(words2)): + print('%s_%s' %(words2[word], len(words2[word]))) +def main(): + text = open_and_read() + sent = read_sentences(text) + split_and_count(sent) +main() +f = open("1.txt", 'r', encoding = "utf-8") +word1 = 0 +word3 = 0 +for line in f: + words = line.split() + for i in range(len(words)): + if len(words[i]) == 3: + word3 += 1 + elif len(words[i]) == 1: + word1 += 1 + words = [] +if word1 == 0: + print("Нет слов длинны 1") +else: + print(float(word3)/float(word1)) +def open_and_edit(): + name = input('Введите название файла: ') + f = open(name, 'r', encoding = "utf-8") + s = f.read() + f.close() + s1 = s.lower() + a = s1.split() + for i, word in enumerate(a): + a[i] = word.strip('.,!?();:*/\|<>-_%& + return a +def find_in_text(t): + hood = [] + for word in t: + if word.endswith('hood'): + hood.append(word) + print('В тексте нашлось ', len(hood), ' существительных с суффиксом -hood') + return hood +def short_list(arr): + short = [] + arr2 = [] + for k in arr: + arr2.append(k) + for i in range(len(arr2)-1): + if arr2[i]: + short.append(arr2[i]) + x = 1 + for j in range(i+1, len(arr2)): + if arr2[i]: + if arr2[i] == arr2[j]: + x += 1 + arr2[j] = [] + short.append(x) + return short +def min_freq(arr): + short = short_list(arr) + min = short[1] + index = 1 + for k in range(1, len(short), 2): + if short[k] < min: + index = k + min = short[k] + print('Минимальную частотность имеет существительное', short[index-1]) +def print_nouns(arr): + nouns = [] + short = short_list(arr) + for word in short: + if type(word) != int: + nouns.append(word.replace('hood', '')) + all_nouns = ', '.join(nouns) + print('Найденный слова образованы от существительных ', all_nouns) +def main(): + text = open_and_edit() + found = find_in_text(text) + min_freq(found) + print_nouns(found) +main() +f = open("aphor.txt", 'r', encoding = "utf-8") +a = f.readlines() +f.close() + +for i in range(len(a)): + words = [] + words = a[i].split() + numb = 0 + for j in range(len(words)): + if words[j] != '—': + numb += 1 + if numb < 16: + print(a[i]) + +author = [] +um = 0 +for i in range(len(a)): + words = [] + words = a[i].split() + for l, word in enumerate(words): + words[l] = word.strip('.,!?();:*/\|<>-_%& + for j in range(len(words)): + if words[j] == 'ум': + um += 1 + povtor = 0 + for k in range(len(author)): + if author[k] == words[len(words)-1]: + povtor += 1 + if povtor == 0: + author.append(words[len(words)-1]) +out = '' +out = ', '.join(author) +print('Количество цитат = ', um) +print('Источники: ', out) + +inp_words = [] +while True: + newword = input('Введите слово: ') + if newword == '': + break + else: + inp_words.append(newword) +for j in range(len(inp_words)): + found = 0 + print(inp_words[j]) + for i in range(len(a)): + words = [] + words = a[i].split() + for l, word in enumerate(words): + words[l] = word.strip('.,!?();:*/\|<>-_%& + for k in range(len(words)): + if inp_words[j] == words[k]: + print(a[i]) + found += 1 + break + if found == 0: + print('Цитата с этим словом не найдена') +import re +def open_file(): + f = open("file.txt", 'r', encoding = "utf-8") + a = [] + for line in f: + a.append(line) + f.close() + return a +def open_new_file(): + f = open("created_file.txt", 'w', encoding = "utf-8") + return f +def write_lines_number(a,f): + i = 0 + for line in a: + i += 1 + f.write(str(i)) + f.write('\n') +def create_dictionary(a,f): + dic = {} + for line in a: + if '= 5:\n')) + while n < 5: + n = int(input('Введённое число < 5. Пожалуйста, введите число >= 5:\n')) + return n +def func3(dic, n): + for i in range(n): + key = random.choice(list(dic.keys())) + m = 3 + print(i + 1, '-ое слово. ', 'Подсказка: ', random.choice(dic[key]), ' ...', sep = '') + fl = 0 + while fl != 1 and m != 0: + print('Попыток осталось: ', m, sep = '') + if input('Введите ниже ваш ответ:\n').lower() == key: + fl = 1 + print('Молодец! Всё верно!') + else: + print('Неверно. ', end = '') + if m != 1: + print('Ещё одна подсказка: ', random.choice(dic[key]), ' ...', sep = '') + m -= 1 + if fl == 0: + print('Вы не угадали. Правильный ответ: ', key, sep = '') +def main(): + dic = func1(input('Введите, пожалуйста, название файла:\n')) + n = func2() + func3(dic, n) +main() + + + +def func1(name): + + + + + f = open(name, 'r', encoding = 'utf-8', errors = 'ignore') + words = f.read().replace('\n', ' ').split() + f.close() + for i, word in enumerate(words): + words[i] = word.lower().strip('.”“,/1234567890@ + return words + +def func3(words, word): + fl = 0 + num = 0 + while fl != 1: + + try: + ind = words.index(word) + except ValueError: + fl = 1 + continue + num += 1 + words.pop(ind) + print(word, ': frequency = ', num, sep = '') + return words +def func2(words): + prefix = 'omni' + + length = len(prefix) + for word in words: + if word.startswith(prefix) and length < len(word): + words = func3(words, word) + words = func3(words, word[length:]) + print('-------------------------------------') +def main(): + func2(func1('file.txt')) +main() + + + + +def func1(name): + + + + + f = open(name, 'r', encoding = 'utf-8', errors = 'ignore') + words = f.read().replace('\n', ' ').split() + f.close() + for i, word in enumerate(words): + words[i] = word.lower().strip('.”“,/1234567890@ + return words + +def func3(words, word): + num = 0 + for elem in words: + if elem == word: + num += 1 + print(word, ': frequency = ', num, sep = '') +def func2(words): + prefix = 'under' + + length = len(prefix) + l = [] + for word in words: + if word.startswith(prefix) and length < len(word) and word not in l: + func3(words, word) + func3(words, word[length:]) + print('-------------------------------------') + l.append(word) +def main(): + func2(func1('file.txt')) +main() + + +import os +def files_and_folders(): + lst = os.listdir('.') + files = [] + folders = [] + for f in lst: + if os.path.isfile(f): + files.append(f) + else: + folders.append(f) + d_files = {} + for f in files: + f_name, f_ext = os.path.splitext(f) + if f_name not in d_files: + d_files[f_name] = 1 + else: + d_files[f_name] += 1 + + return d_files, folders +def counting(d_files): + num = 0 + punct_marks = '.!?:;,-()"\'<>' + + for key in d_files: + fl = 0 + i = 0 + while fl != 1 and i < len(punct_marks): + if punct_marks[i] in key: + fl = 1 + i += 1 + if fl == 1: + num += d_files[key] + return num +def output(num, d_files, d_folders): + + print('Количество файлов, названия которых содержит знаки препинания = ', num) + print('Названия файлов и папок в данной папке следующие:') + i = 1 + for key in d_files: + print('%s) %s' % (str(i), str(key))) + i += 1 + for key in d_folders: + if key not in d_files: + print('{}) {}'.format(str(i), str(key))) + i += 1 +def main(): + d_files, folders = files_and_folders() + num = counting(d_files) + output(num, d_files, folders) +if __name__ == '__main__': + main() + +import os +def walking(): + num = 0 + for root, dirs, files in os.walk('.'): + d_files = {} + flag = False + for file in files: + file_name, file_ext = os.path.splitext(file) + if file_ext not in d_files: + d_files[file_ext] = 1 + else: + flag = True + break + if flag: + num += 1 + return num +def main(): + num = walking() + print('Количество папок, в которых встречаются несколько файлов с одним\ +и тем же расширением = {}.'.format(num)) +if __name__ == '__main__': + main() + +import os +def walking(): + num = 0 + for root, dirs, files in os.walk('.'): + d_files = {} + flag = False + for file in files: + file_name, file_ext = os.path.splitext(file) + if file_ext not in d_files: + d_files[file_ext] = 1 + else: + flag = True + print(file) + break + if not flag: + num += 1 + return num +def main(): + num = walking() + print('Количество папок, в которых встречаются несколько файлов с одним\ +и тем же расширением = {}.'.format(num)) +if __name__ == '__main__': + main() +import re +def reading(name): + f = open(name, 'r', encoding = 'utf-8') + words = f.read().split(' ') + f.close() + return words +def cleaning(words): + for i, word in enumerate(words) : + words[i] = word.lower().strip('.,/1234567890@ + return words +def printing(words): + l = [] + for word in words: + if re.search('кот', word) and word not in l: + l.append(word) + print(word) +def main(): + words = reading(input('Введите, пожалуйста, название файла:\n')) + words = cleaning(words) + printing(words) +if __name__ == '__main__': + main() + +n = float(input('Введите любое число\n')) +print('число | ', n) +for i in range(9 + len(str(round(n * 10, 3)))) : + print('-', end = '') +print() +for i in range(1,11) : + if i != 10 : + print(i, ' | ', round(i * n, 3), end = '\n') + else : + print(i, ' | ', round(i * n, 3), end = '\n') + +n = float(input('Введите любое число\n')) +for i in range(1,11) : + print(i, '*', n, '=', i * n, end = '\n') +import re +def reading(name): + f = open(name, 'r', encoding = 'utf-8') + lines = f.readlines() + f.close() + return lines +def array(lines): + text = ''.join(lines) + text = re.sub('((.|\n)*)', '\\1', text) + text = re.sub('<[wc](.*?)>(.*?)', '\\1 \\2', text) + print(text) + l = re.findall('lemma="(.*?)" type="(.*?)" (.*)', text) + return l +def recording1(d, n): + f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8') + f.write(str(n) + '\n') + for key in d.keys(): + f.write(key + '\n') + f.close() +def recording2(d): + f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8') + + for key, value in d.items(): + if re.search('l.f.*', key): + f.write(key + ' - ' + str(value) + '\n') + f.close() +def recording3(l): + name = input('Введите, пожалуйста, название выходного файла в формате csv\n') + + while not name.endswith('.csv'): + name = input('Введите, пожалуйста, название выходного файла в формате csv\n') + f = open(name, 'a', encoding = 'utf-8') + for i, elem in enumerate(l): + f.write(elem[0] + ',' + elem[1] + ',' + elem[2] + '\n') + f.close() +def dictionary(lines): + d = {} + for line in lines: + r = re.search('lemma=".*" type="(.*)"', line) + if r: + key = r.group(1) + if key in d: + d[key] += 1 + else: + d[key] = 1 + return d +def main(): + name = input('Введите, пожалуйста, название входного файла\n') + lines = reading(name) + n = len(lines) + d = dictionary(lines) + recording1(d, n) + recording2(d) + l = array(lines) + recording3(l) +if __name__== '__main__': + main() +import random + +def noun() : + f = open('nouns.txt', 'r', encoding = 'utf-8') + nouns = f.read().split() + f.close() + return random.choice(nouns) + +def personal_pronoun() : + f = open('personal_pronouns.txt', 'r', encoding = 'utf-8') + pronouns = f.read().split() + f.close() + return random.choice(pronouns) + +def adjective_before_noun() : + f = open('adjectives_before_noun.txt', 'r', encoding = 'utf-8') + adj = f.read().split() + f.close() + return random.choice(adj) + +def adjective_after_noun() : + f = open('adjectives_after_noun.txt', 'r', encoding = 'utf-8') + adj = f.read().split() + f.close() + return random.choice(adj) + +def adverb() : + f = open('adverbs.txt', 'r', encoding = 'utf-8') + adverbs = f.read().split() + f.close() + return random.choice(adverbs) + +def intensifier(adv): + f = open('intensifiers.txt', 'r', encoding = 'utf-8') + intensifiers = f.read().split() + f.close() + return random.choice(intensifiers) + ' ' + adv + +def transitive_infinitive() : + f = open('transitive_infinitives.txt', 'r', encoding = 'utf-8') + inf = f.read().split() + f.close() + return random.choice(inf) + +def intransitive_infinitive() : + f = open('intransitive_infinitives.txt', 'r', encoding = 'utf-8') + inf = f.read().split() + f.close() + return random.choice(inf) + +def temporary_marker() : + f = open('temporary_markers.txt', 'r', encoding = 'utf-8') + temporary_markers = f.read().split() + f.close() + return random.choice(temporary_markers) + +def interrogative() : + f = open('interrogatives.txt', 'r', encoding = 'utf-8') + interrogatives = f.read().split() + f.close() + return random.choice(interrogatives) + +def number() : + f = open('numbers.txt', 'r', encoding = 'utf-8') + numbers = f.read().split() + f.close() + return random.choice(numbers) + + +def declension(noun, adjective, number) : + f = open('declension_of_nouns.txt', 'r', encoding = 'utf-8') + g = open('declension_of_adjectives.txt', 'r', encoding = 'utf-8') + nouns = dict() + adjectives = dict() + for line in f.readlines() : + s = line.split(' ', maxsplit = 1) + nouns[s[0]] = s[1].split() + for line in g.readlines() : + s = line.split(' ', maxsplit = 1) + adjectives[s[0]] = s[1].split() + f.close() + g.close() + if nouns[noun][0] == 'm' and number == 'sg' : + return noun, adjective, random.choice(['le', 'un']) + elif nouns[noun][0] == 'm' and number == 'pl' : + return nouns[noun][1], adjectives[adjective][1], random.choice(['les', 'des']) + elif nouns[noun][0] == 'f' and number == 'sg' : + return noun, adjectives[adjective][0], random.choice(['la', 'une']) + elif nouns[noun][0] == 'f' and number == 'pl' : + return nouns[noun][1], adjectives[adjective][2], random.choice(['les', 'des']) + +def collocation_bef(noun, adj_before_noun, article) : + return article + ' ' + adj_before_noun + ' ' + noun + +def collocation_aft(noun, adj_after_noun, article) : + return article + ' ' + noun + ' ' + adj_after_noun + +def conjugation(pronoun, infinitive) : + f = open('conjugations.txt', 'r', encoding = 'utf-8') + verbs = dict() + for line in f.readlines() : + s = line.split(' ', maxsplit = 1) + verbs[s[0]] = s[1].split() + f.close() + if pronoun == 'je' : + return verbs[infinitive][0] + elif pronoun == 'tu' : + return verbs[infinitive][1] + elif pronoun == 'il' or pronoun == 'elle' : + return verbs[infinitive][2] + elif pronoun == 'nous' : + return verbs[infinitive][3] + elif pronoun == 'vous' : + return verbs[infinitive][4] + else : + return verbs[infinitive][5] + +def affirmative_sequence(pronoun, verb) : + if verb[0] in 'aàâeéèêiîoôuùûy' and pronoun == 'je' : + return "j'" + verb + else : + return pronoun + ' ' + verb + +def interrogative_sequence(pronoun, verb) : + if verb[len(verb) - 1] in 'aàâeéèêiîoôuùûy' and pronoun[0] in 'aàâeéèêiîoôuùûy' : + return verb + '-t-' + pronoun + else : + return verb + '-' + pronoun + +def affirmative_sentence() : + pron = personal_pronoun() + noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'sg') + noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'pl') + return affirmative_sequence(pron, conjugation(pron, transitive_infinitive())) + ' ' + collocation_bef(noun1, adj1, art1) + ' et ' + number() + ' ' + adj2 + ' ' + noun2 + '.' + +def interrogative_sentence() : + pron = personal_pronoun() + return interrogative() + ' ' + interrogative_sequence(pron, conjugation(pron, intransitive_infinitive())) + ' ' + temporary_marker() + '?' + +def negative_sentence() : + noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'pl') + noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'sg') + return collocation_aft(noun1, adj1, art1) + ' ne ' + conjugation('elle', transitive_infinitive()) + ' pas ' + collocation_bef(noun2, adj2, art2) + ' ' + temporary_marker() + ' ' + intensifier(adverb()) + '.' + +def conditional_sentence() : + return '[Здесь должно быть условное предложение, но я пока не представляю, как оно устроено во французском :( ].' + +def imperative_sentence() : + return 'ne ' + conjugation('vous', intransitive_infinitive()) + ' pas' +'!' + +def random_sentence(n) : + if n == 1 : + return affirmative_sentence() + elif n == 2 : + return interrogative_sentence() + elif n == 3 : + return negative_sentence() + elif n == 4 : + return conditional_sentence() + else : + return imperative_sentence() + +def text_print() : + a = set('12345') + for n in a : + print(random_sentence(int(n)).capitalize(), end = ' ') + +text_print() + +import re + + + + + + + + + + + + + + + + + + +def reading(name): + f = open(name, 'r', encoding = 'utf-8') + words = f.read().replace('\n', ' ').split() + f.close() + return words + +def cleaning(words): + for i, word in enumerate(words) : + words[i] = word.lower().strip('.,/1234567890@ + return words + +def printing(words): + for word in words: + if re.search('загру(з(ят(ся)?|и(шь(ся)?|(сь)?|м(ся)?|л((ся)?|а(сь)?|и(сь)?|о(сь)?)|т((ся)?|е(сь)?|ь(ся)?)|в(ш(ую(ся)?|ая(ся)?|е(го(ся)?|му?(ся)?|й(ся)?|е(ся)?|ю(ся)?)|и((сь)?|й(ся)?|м(и)?(ся)?|е(ся)?|х(ся)?)))?))|ж(у(сь)?|ен(а|о|ы)?|ён|(е|ё)нн(ая|ую|о(м(у)?|ю|е|го|й)|ы(м(и)?|й|е|х))))$', word): + print(word) +def main(): + words = cleaning(reading(input('Введите, пожалуйста, название файла:\n'))) + printing(words) +main() + +import re +def reading(name): + f = open(name, 'r', encoding = 'utf-8') + text = f.read() + f.close() + return text +def find(text): + r = re.search('\= 0 : + bigw += 1 + else : + j = 0 + while j < len(l[i]) and alph.find(l[i][j]) == -1 : + j += 1 + if j == len(l[i]) : + allw -= 1 + elif alphUP.find(l[i][j]) >= 0 : + bigw += 1 +if allw != 0 : + print('The percentage of words, which start with uppercase equals to ', round(bigw / allw * 100, 3), '%', sep = '') +else : + if fl == 0 : + print('There are no words at all! Try to use another file.') + else : + print('There are some symbols, but no words in Russian. Try to use another file!') +f.close() + + + +fl = 0 +while fl != 1 : + word = input('Please input one word:\n') + ind = word.find(' ') + if ind == -1 : + fl = 1 + else : + if ind == 0 : + word = word[1:] + flag = word.find(' ') + while flag == 0 : + word = word[1:] + flag = word.find(' ') + if flag > 0 : + subword = word[flag:] + ind = subword.find(' ') + while ind == 0 : + subword = subword[1:] + ind = subword.find(' ') + if subword != '' : + print('There is more than one word. Please try again!') + else : + word = word[:flag] + fl = 1 + else : + if word != '' : + fl = 1 + else : + print("You didn't type any word! Please try again!") + else : + subword = word[ind:] + flag = subword.find(' ') + while flag == 0 : + subword = subword[1:] + flag = subword.find(' ') + if subword != '' : + print('There is more than one word. Please try again!') + else : + word = word[:ind] + fl = 1 +for i in range(len(word)) : + print(word[i:]) + +fl = 0 +while fl != 1 : + word = input('Please input one word:\n') + word = word.strip() + ind = word.find(' ') + if ind == -1 : + if word != '' : + fl = 1 + else : + print("You didn't type any word! Please try again") + else : + print('There is more than one word. Please try again!') +for i in range(len(word)) : + print(word[i:]) + + + +a = float(input('enter the first number\n')) +b = float(input('enter the second number\n')) +c = float(input('enter the third number\n')) +if b == 0. : + print('you can\'t divide by zero') +elif a % b == c and a / b == c : + print('YES') +else : + print('NO') + + +a = float(input('enter the first number\n')) +b = float(input('enter the second number\n')) +c = float(input('enter the third number\n')) +if b == 0. : + print('you can\'t divide by zero') +else : + if a % b == c : + print('YES, a % b == c') + else : + print('NO, a % b != c') + if a / b == c : + print('YES, a / b == c') + else : + print('NO, a / b != c') + + +a = int(input('enter the first number\n')) +b = int(input('enter the second number\n')) +c = int(input('enter the third number\n')) +if b == 0 : + print('you can\'t divide by zero') +elif a % b == c and a / b == c : + print('YES') +else : + print('NO') + + +a = int(input('enter the first number\n')) +b = int(input('enter the second number\n')) +c = int(input('enter the third number\n')) +if b == 0 : + print('you can\'t divide by zero') +else : + if a % b == c : + print('YES, a % b == c') + else : + print('NO, a % b != c') + if a / b == c : + print('YES, a / b == c') + else : + print('NO, a / b != c') +import re +import os +import csv +def printing(d1, d2, arr): + f = open('output1.txt', 'w', encoding = 'cp1251') + for key, value in sorted(d1.items()): + f.write(key + '\t' + str(value) + '\n') + f.close() + with open('output2.csv', 'w', encoding = 'cp1251') as csv_file: + writer = csv.writer(csv_file, delimiter = ';') + writer.writerow(['Название файла', 'Автор', 'Дата создания текста']) + for key, value in sorted(d2.items()): + lst = [str(key), str(value[0]), str(value[1])] + writer.writerow(lst) + f = open('output3.txt', 'w', encoding = 'cp1251') + for elem in arr: + f.write(elem + '\n') + f.close() +def dictionary(name): + d1 = {} + d2 = {} + arr = [] + for file in os.listdir(name): + with open(os.path.join(name, file), 'r', encoding = 'cp1251') as text: + text = text.read() + a = re.findall('(.*?)', text) + d1[file] = len(a) + b = re.findall('<.*?>(.*?)([\s,.!123456790:;?""])', text) + words = [words_punct[i][0] for i in range(len(words_punct))] + puncts = [words_punct[i][1] for i in range(len(words_punct))] + d = re.findall('gr="A.*?gen.*?>(.*?)\s.*?gr="S.*?gen.*?>(.*?)', text) + e = [] + for i, elem in enumerate(d): + ind1 = words.index(elem[0]) + ind2 = words.index(elem[1]) + if ind2 - ind1 == 1: + t = ind1 - 1 + while t >= 0 and puncts[t] not in '[.?!]': + t -= 1 + k = ind2 + while k <= len(words) - 1 and puncts[k] not in '[.?!]': + k += 1 + s = '' + for p in range(t + 1, k): + if p != ind1 and p != ind2: + s += words[p] + puncts[p] + elif p == ind1: + s += '\t' + words[p] + puncts[p] + else: + s += words[p] + puncts[p] + 't' + e.append(s) + arr.extend(e) + return d1, d2, arr +def main(): + d1, d2, arr = dictionary('news') + printing(d1, d2, arr) +if __name__ == '__main__': + main() + +import re +def reading(): + f = open('input.txt', 'r', encoding = 'utf-8') + + text = f.read() + f.close() + text = text.replace('...', '.') + + text = text.replace('—', '') + + + text = re.sub('[\.!\?]([а-яa-z])', ' \\1', text) + text = re.sub('[\.!\?]\)?»? ?«?\(?([а-яa-z])', ' \\1', text) + + + text = re.sub('\.([A-ZА-Я])', ' \\1', text) + + text = re.sub('([A-ZА-Я])\. ([A-ZА-Я])', '\\1 \\2', text) + + + + sentences = re.split(r'[.!?]', text) + + + sentences = [' '.join([word.strip('» «\n:<>\'"@ + + return sentences +def output(sentences): + f = open('output.txt', 'a', encoding = 'utf-8') + for sentence in sentences: + if len(sentence.split()) > 10: + s = 0 + for word in sentence.split(): + s += len(word) + f.write('"{}": это предложение со словами длины {:.1f}\n'.format(sentence, s/len(sentence.split()))) + f.close() +def main(): + sentences = reading() + output(sentences) +if __name__ == '__main__': + main() +print('Введите 7 целых чисел') +arr = [] +for i in range(1, 8) : + print('Введите ', i, '-ое целое число', sep = '') + arr.append(int(input())) +for i in range(7) : + for j in range(arr[i]) : + print('X', end = '') + print() + +import re +def reading(name): + f = open(name, 'r', encoding = 'utf-8') + text = f.read() + f.close() + return text +def correction(text): + corrected_text = re.sub('(Ф|ф)инлянди( |я(х|(ми?))?|и|й|ю|е(й|ю))', '\\1@алайзи\\2', text) + corrected_text = re.sub('ФИНЛЯНДИ( |Я(Х|(МИ?))?|И|Й|Ю|Е(Й|Ю))', 'МАЛАЙЗИ\\1', corrected_text) + corrected_text = corrected_text.replace('Ф@', 'М') + corrected_text = corrected_text.replace('ф@', 'м') + return corrected_text +def recording(text): + f = open(input('Введите, пожалуйста, название файла вывода:\n'), 'w', encoding = 'utf-8') + f.write(text) + f.close() +def main(): + text = reading(input('Введите, пожалуйста, название файла ввода:\n')) + corrected_text = correction(text) + recording(corrected_text) +if __name__ == '__main__': + main() +import re +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def search(text): + m = re.findall('\\bдинозавр(a(ми|х)?|у|о(м|в)|е|ы)', text) + return m +def tags(text): + m = re.sub('<.*?>', '', text, flags = re.DOTALL) + return m +def replace(text): + a = re.sub('\\bдинозавр', 'кот', text, flags = re.DOTALL) + return a +def images(text): + n = re.sub('(а|е|ё|и|оуэюя)') +fname = input() +txt = open_text(fname) +res = replace(txt) +print(res) +import re +import os +def auth(direct): + d = {} + for root, dirs, files in os.walk(direct): + for file in files: + with open(os.path.join(direct, file)) as f: + text = f.read() + regex1 = 'content=".*" name="author"' + a = re.findall(regex1, text) + for elem in a: + b = re.sub('content="', '', elem) + c = re.sub('" name="author"', '', b) + d[file] = c + return d +def topic(direct): + d = {} + for root, dirs, files in os.walk(direct): + for file in files: + with open(os.path.join(direct, file)) as f: + text = f.read() + regex1 = 'content=".*" name="topic"' + a = re.findall(regex1, text) + for elem in a: + b = re.sub('content="', '', elem) + c = re.sub('" name="topic"', '', b) + d[file] = c + return d +def main(): + direct = './news' + d1 = auth(direct) + d2 = topic(direct) + with open('./15.csv', 'w', encoding='utf-8') as f: + for key in d1.keys(): + f.write('\n{}'.format(key)) + f.write(' {} '.format(d1[key])) + f.write('{}'.format(d2[key])) +main() +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for index, elem in enumerate(arr): + arr[index] = elem.strip(',.;:!?\n ') + return arr +def first_letter(letter, way_to_file): + arr = open_text(way_to_file) + array = [] + for elem in arr: + if elem[0] == letter: + array.append(elem) + return array +def questions(): + letter = input() + fname = input() + result = first_letter(letter, fname) + return result +result = questions() +print(result) +import re +import os +def number_sent(direct): + d = {} + for root, dirs, files in os.walk(direct): + for file in files: + with open(os.path.join(direct, file)) as f: + text = f.read() + regex = '' + arr = re.findall(regex, text) + d[file]=len(arr) + return d +def main(): + direct = './news' + d = number_sent(direct) + with open('./11.txt', 'w', encoding='utf-8') as f: + for key in d.keys(): + f.write('\n{} {}'.format(key, d[key])) +main() + +import random + +def imperative(): + + + imperative = ["прокати", "уходи", "не спеши", "погоди", "подожди", "позвони", "убегай", "не плошай", "подержи"] + return random.choice(imperative) +def verb(): + + + plural_verbs = ["привезут", "принесли", "принесут", "пожуют", "погрызут", "приплетут", "приведут", "привели"] + return random.choice(plural_verbs) +def noun_phrase(): + + + clitics = ["по", "ни", "на", "хоть", "лишь", "вот", "не", "от", "за", "пусть"] + clitic = random.choice(clitics) + + words2 = ["себе", "тебе", "земля", "игра", "звезда", "мороз", "ответ", "превед", "футбол", "печаль", "бокал"] + noun = random.choice(words2) + return clitic + ' ' + noun +def noun(number): + + + singular_nouns = ["монолог", "коридор", "почему", "потому", "отчего", "каратэ", "кабарэ", "курага", "кандидат"] + plural_nouns = ["малыши", "рукава", "камыши", "табуны", "рюкзаки", "пиджаки", "пацаны", "чуваки"] + + if number == 's': + return random.choice(singular_nouns) + + return random.choice(plural_nouns) +def punctuation(): + + marks = [".", "?", "!", "..."] + return random.choice(marks) +def verse1(): + + + return noun('pl') + ' ' + verb() + ' ' + noun('pl') + punctuation() +def verse2(): + + + return imperative() + ' ' + noun('s') + ' ' + noun_phrase() + punctuation() +def verse3(): + + + return noun_phrase() + ' ' + verb() + ' ' + noun('pl') + punctuation() +def make_verse(): + + verse = random.choice([1,2,3]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + else: + return verse3() + + +for n in range(4): + print(make_verse()) + +word=input() +for index, elem in enumerate(word): + if (index + 1) % 2 ==1: + if elem in 'пое': + print(elem) + else: + continue + else: + continue +message=input('Введите слово или сообщение: ') +result='' +for letter in message: + result += letter + print(result) +import re +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for index, elem in enumerate(arr): + arr[index] = elem.strip(',.;:!?\n ') + return arr +def main(): + reglex = 'на(й(ти|д(я|ут?|((е|ё)(шь|т|м|те)|ен(а|о|ы)?)))|ш((е|ё)л|л(а|о|и))|шедш(е(е|й|го|му?)|ая|ую|и(й|е|х|ми?))|йденн(о(е|го|ому?)|ая|ой|ую|ы(й|е|х|ми?))(с(ь|я))?)' + fname = input() + arr = open_text(fname) + array = [] + for elem in arr: + m = re.search(reglex, elem) + if m != None: + if elem not in array: + array.append(elem) + return array +result = main() +print(result) + +import random +def organising_array(way_to_file): + f = open(way_to_file, 'r', encoding = 'utf-8') + file = f.read() + arr = file.split('\n') + return arr +def noun_phrase(): + adjectives = organising_array('./1.txt') + adjective = random.choice(adjectives) + nouns = organising_array('./2.txt') + noun = random.choice(nouns) + return adjective + ' ' + noun +def clause(): + clauses = organising_array('./3.txt') + return random.choice(clauses) +def adverb(): + adverbs = organising_array('./4.txt') + return random.choice(adverbs) +def clause2(): + clitics = organising_array('./5.txt') + clitic = random.choice(clitics) + pronouns = organising_array('./6.txt') + pronoun = random.choice(pronouns) + verbs = organising_array('./7.txt') + verb = random.choice(verbs) + return clitic + ' ' + pronoun + ' ' + verb +def objects(): + objects = organising_array('./8.txt') + return random.choice(objects) +def patient(): + patients = organising_array('./9.txt') + return random.choice(patients) +def verb(): + verbs = organising_array('./10.txt') + return random.choice(verbs) +def praep_phrase(): + praeps = organising_array('./11.txt') + praep = random.choice(praeps) + nouns = organising_array('./12.txt') + noun = random.choice(nouns) + return praep + ' ' + noun +def adjective(): + adjectives = organising_array('./13.txt') + return random.choice(adjectives) +def punctuation(): + marks = organising_array('./14.txt') + return random.choice(marks) +def verse1(): + return noun_phrase()+ punctuation() + ' ' + clause() + punctuation() +def verse2(): + return adverb() + ' ' + clause2() + ' ' + objects() + punctuation() +def verse3(): + return patient() + ' ' + verb() + ' ' + praep_phrase() + ' ' + adjective() + punctuation() +def make_verse(): + verse = random.choice([1,2,3]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + else: + return verse3() +for n in range(4): + print(make_verse()) +import re +def open_text(way): + with open(way, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.replace(',', '') + text = text.replace('.', '') + text = text.replace(':', '') + text = text.replace(';', '') + text = text.replace('!', '') + text = text.replace('?', '') + text = text.replace('-', '') + text = text.replace('"', '') + text = text.replace('(', '') + text = text.replace(')', '') + return text +def search(text): + regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b' + m = re.findall(regex, text, flags = re.DOTALL) + return m +def write(fname, m): + with open(fname, 'a', encoding = 'utf-8') as f: + for elem in m: + f.write(elem) + f.write('\n') +way = input() +fname = input() +text = open_text(way) +m = search(text) +write(fname, m) +def open_text(way): + with open(way, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for index, elem in enumerate(arr): + arr[index] = elem.strip(',.;:!?-') + return arr +def main(): + fname = input() + arr = open_text(fname) + n = len(arr) + return n +res = main() +print('В файле содержится ', res, ' слов') +def open_text(way): + with open(way, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for index, elem in enumerate(arr): + arr[index] = elem.strip(',.;:!?-"') + return arr +def freq_list(arr): + d = {} + for elem in arr: + if elem not in d.keys(): + d[elem] = 1 + else: + d[elem] = d[elem] + 1 + return d +def sort(d): + array = [] + for elem in d.keys(): + array.append(elem) + arr = [] + for i in range(len(array)): + temp = array[i] + for index, elem in enumerate(array): + if elem < temp: + t = temp + temp = elem + array[index] = t + if temp not in arr: + arr.append(temp) + return arr +def write(fname, d, arr): + with open(fname, 'a', encoding = 'utf-8') as f: + for elem in arr: + f.write(elem) + f.write(',') + f.write(str(d[elem])) + f.write('\n') +fname = input() +fname2 = input() +arr = open_text(fname) +d = freq_list(arr) +a = sort(d) +write(fname2, d, a) +import re +def open_text(way): + with open(way, 'r', encoding = 'utf-8') as f: + text = f.read() + text = re.sub(',.:;!?-"()\n', '', text) + arr = text.split() + return arr +def search(arr): + result = [] + regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b' + for i in range(len(arr) - 7): + current = ' '.join(arr[i:i+7]) + m = re.search(regex, current) + if m is not None: + result.append(current) + return result +def write(fname, m): + with open(fname, 'a', encoding = 'utf-8') as f: + for elem in m: + f.write(elem) + f.write('\n') +way = input() +fname = input() +text = open_text(way) +m = search(text) +write(fname, m) +import random +def opening_csv(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + arr = text.split('\n') + d = {} + for elem in arr: + array = elem.split(',') + d[array[0]] = array[1] + return d +def random_key(d): + array = [] + for elem in d.keys(): + array.append(elem) + word = random.choice(array) + return word +def main(): + way_to_file = input() + vocabul = opening_csv(way_to_file) + word = random_key(vocabul) + print(word) + for i in range(len(vocabul[word])): + print('_', end = ' ') + solve = input() + if solve == vocabul[word]: + result = 'WIN!!!' + else: + result = 'FAIL(((' + return result +res = main() +print(res) +import re +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def search(text): + text1 = re.sub('<.*?>', '', text, flags = re.DOTALL) + text2 = re.sub('\n', '', text1, flags = re.DOTALL) + m = re.findall('Часовой поясUTC.?[0-9]', text2) + return m +def write(arr, way_to_file2): + with open(way_to_file2, 'a', encoding = 'utf-8') as f: + for elem in arr: + newtext = f.write(elem) + return newtext +def main(): + fname1 = input() + fname2 = input() + t = open_text(fname1) + txt = search(t) + res = write(txt, fname2) + return res +a = main() +import os +import re +def no_numbers(): + num = '(1|2|3|4|5|6|7|8|9|0)' + file = '\.' + a = [] + for elem in os.listdir('.'): + res = re.search(num, elem) + if res == None: + result = re.search(file, elem) + if result: + a.append(elem) + n = len(a) + return n +def no_repet(): + arr = [] + for elem in os.listdir('.'): + a = re.sub('\..*', '', elem) + if a not in arr: + arr.append(a) + return arr +print('Количество файлов без цифр в названии равно', no_numbers()) +print('Найдены следующие файлы и папки (без повторов):', no_repet()) +a=[] +word=input() +while word: + a.append(word) + word=input() +for el in a: + if len(el)>5: + print(el) + else: + continue +import re +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def replace1(txt): + m = re.sub(r'\bвикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bбурундук\1', txt, flags = re.DOTALL) + return m +def replace2(txt): + n = re.sub(r'\bВикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bБурундук\1', txt, flags = re.DOTALL) + return n +def write(txt, way_to_file2): + with open(way_to_file2, 'w', encoding = 'utf-8') as f: + newtext = f.write(txt) + return newtext +def main(): + fname1 = input() + fname2 = input() + txt = open_text(fname1) + r = replace1(txt) + res = replace2(r) + result = write(res, fname2) + return result +a = main() +import os +import re +def dir_choose_kyr(dir_name): + arr = [] + regex ='[А-Я|Ё|а-я|ё| ]*' + for root, dirs, files in os.walk(dir_name): + for elem in dirs: + r = re.sub(regex, '', elem) + if r == '': + arr.append(elem) + print(arr) + n = len(arr) + return n +def main(): + dir_name = '.' + n = dir_choose_kyr(dir_name) + print('В папке найдено ', n, ' папок с полностью кириллическими названиями (допускаются пробелы между словами)') + return n +main() +way = input('Введите путь к файлу без дополнительных символов: ', ) +f = open(way, 'r', encoding = 'utf-8') +text = f.read() +f.close() +min = len(text) +max = 0 +arr = text.split('\n') +for el in arr: + if len(el) > max: + max = len(el) + if len(el) < min: + min = len(el) +k = max/min +print('Самая короткая строка короче самой длинной в ', k, ' раз(а)') +def open_text(way_to_file): + with open(way_to_file, 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.lower() + arr = text.split() + for index, elem in enumerate(arr): + arr[index] = elem.strip(',.;:!?\n ') + return arr +def finding_suffix(suffix, way_to_file): + arr = open_text(way_to_file) + array = [] + for elem in arr: + a = len(elem) - len(suffix) + b = len(elem) + if elem[a:b] == suffix: + array.append(elem) + return array +def one_word_once(array): + arr = [] + for elem in array: + if elem not in arr: + arr.append(elem) + return arr +def func(array): + temp = [] + arr = [] + for elem in array: + if elem not in temp: + temp.append(elem) + else: + arr.append(elem) + return arr +def count_freq(array): + result = array + for i in range(len(array)): + temp = func(result) + if len(temp)==0: + break + else: + result = temp + return result +fname = input() +suffix = 'ness' +arr = finding_suffix(suffix, fname) +array = one_word_once(arr) +print('В тексте имеются следующие слова с суффиксом ', suffix, ':') +for elem in array: + print(elem) +max_freq = count_freq(arr) +print('Макс. частоту имеет(-ют) слово(-а):', max_freq) +import os,re +def counting_sentences(file): + sentences = re.findall('', file) + return len(sentences) +def opening_folders(folder): + path = folder + dic = {} + for file in os.listdir(folder): + with open ((os.path.join(folder, file))) as f: + text = f.read() + number = int(counting_sentences(text)) + dic[file] = number + return dic +def writing_table(dic): + with open ('number_of-sentences.txt', 'w', encoding = 'utf-8') as f: + for file in dic: + f.writelines(file + '\t' + str(dic[file]) + '\n') +def author_and_topic(folder): + path = folder + for file in os.listdir(folder): + with open ((os.path.join(folder, file))) as f: + text = f.read() + reg1 = '(content="(.*)" name="author")' + reg2 = '(content="(.*)" name="topic")' + for i in range (1): + for i in re.findall(reg1, text): + author = i[1] + for i in re.findall(reg2, text): + topic = i[1] +writing_table(opening_folders(r'C:\Users/student/Desktop/news/')) +author_and_topic(r'C:\Users/student/Desktop/news/') + +import os +number = 0 +for roots, dirs, files in os.walk('.'): + names = [] + for f in files: + name = f[::-1].split('.')[0] + if name not in names: + names.append(name) + else: + number += 1 + break +print(number) +import re +m1 = 'загруж(у|(енн?(ы(й|е|х|ми?)?|о(го|му?|е|й)?|ую|а)?))(с(я|ь))?' +m2 = 'загруз(и(т|шь|м|т(ь|е)?|л(а|о|и)?)?(в(ш(и(й|ми?|х)|е(го|му?|е|й)))?)?|ят)(с(я|ь))?' +with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW9\re.txt",'r', encoding='utf-8') as f: + mas = [] + for line in f: + words = line.split() + for word in words: + word = word.strip(',.;"()-!?') + mas.append(word.lower()) +arr = [] +for i in mas: + a = re.search(m1,i) + b = re.search(m2,i) + if a != None and len(a.group()) == len(i): + if a.group() not in arr: + arr.append(a.group()) + print(a.group()) + if b != None and len(b.group()) == len(i): + if b.group() not in arr: + arr.append(b.group()) + print(b.group()) + +i=1 +int(i) +print ('Введите любое число') +a=int(input()) +while i<=10: + print(i,'*',a,'=',(i*a),';') + i += 1 +else: + print ('Цикл завершен') +def opening(name): + with open (name, 'r', encoding = 'utf-8') as f: + count = 0 + for line in f: + if line !=' \n': + count += 1 + else: + break + return count +def writing(): + count = opening(name) + with open('2.txt', 'w', encoding = 'utf-8') as f: + f.write(str(count)) +name = r'C:\Users\student\Desktop\1.xml' +writing() +import re +with open (name, 'r', encoding = 'utf-8') as f: + content = f.read() + arr = re.findall(r'(.*?)', content) + d = {} + for i in arr: + d[i[1]] = content.count(i[1]) +with open ('3.txt', 'w', encoding = 'utf-8') as f: + for key in d: + a = str(key) + ' ' + str(d[key]) + '\n' + f.write(a) + + + +def reading(): + with open (r"C:\Users\Анна\Documents\ФиКЛ\PythonHW7\omni.txt", 'r', encoding='utf8') as text: + mas = [] + for line in text: + words = line.split() + for word in words: + word = word.strip(',.;"()-!?') + mas.append(word.lower()) + return(mas) +def omni_counting(): + s = 0 + omni = [] + for word in reading(): + if word[:4] == 'omni': + s += 1 + if word not in omni: + omni.append(word) + print (s,'words with OMNI-') + p = 0 + for word in omni: + without_omni = [] + w2 = word[4:] + if w2 not in without_omni: + without_omni.append(w2) + p += int(reading().count(w2)) + print(p, 'words without OMNI-') +omni_counting() + +import os, re +def folder_opening(big_folder): + names = [item for item in os.listdir(big_folder) if os.path.isfile(item) and re.search('[^.]*\..*?[,._?<>''""!-()].*?',str(item)[::-1])] + return len(names) +print('Найдено',folder_opening('.'), 'файлов, название которых содержит знаки препинания') +def all_files(big_folder): + files = [item[::-1] for item in os.listdir(big_folder) if os.path.isfile(item)] + all_files = [] + for item in files: + all_files.append((re.sub(u'([^.]*\.)?(.*)', u'\\2', str(item))[::-1])) + for item in os.listdir(big_folder): + if os.path.isdir(item): + all_files.append(item) + all_files_new = [] + for item in all_files: + if item not in all_files_new: + all_files_new.append(item) + + return all_files_new +print('Все файлы:', all_files('.')) + +total = 0 +upletters = 0 +with open(r'C:\Users\Анна\Documents\GitHub\prog\PythonHW5\text.txt','r',encoding='utf8') as f: + text = f.read() + words = text.split() + for item in words: + total += 1 + for letter in item: + if letter.isupper(): + upletters += 1 + else: + continue +print("Количество слов в тексте: ",total) +print('Количество слов с заглавной буквы',upletters) +print("Процент слов в тексте, начинающихся с заглавной буквы: ", upletters/total,'%') + + + + + + + +arr = [] +num = 1 +while num <=7: + chislo = int(input('Vvedite chisclo')) + if chislo > 0: + arr.append('X'*chislo) + else: + arr.append('') + num += 1 +for i in arr: + print(i) +def opening(file): + with open(file, 'r', encoding = 'utf-8') as f: + f = f.read() + sentences = f.split('.') + mas = [] + for sentence in sentences: + sentence = sentence.split('!') + for i in sentence: + i = i.split('?') + for a in i: + mas.append(a) + return mas +for sentence in opening(r"C:\Users\Анна\Documents\GitHub\prog\PythonHW12\text.txt"): + words = sentence.split() + new_words = [word.strip('.,!?/-;:''""«»—()') for word in words if len(words) > 10] + print(new_words) + lenght = 0 + for word in new_words: + lenght += len(word) + if new_words: + template = 'Это предложение со словами длины {:.1f}' + print (template.format(lenght/len(new_words))) +print ('Введите три числа A,B и C') +a = int(input ()) +b = int(input ()) +c = int(input ()) +print ("A =",a) +print ("B =",b) +print ("C =",c) +print ('A+B =',(a+b)) +print ('A*C + B =',(a*c+b)) +if (a+b) == c: + if (a*c)+b==0: + print ('сумма чисел A и B равна С и число С является решением квадратного уравнения ax+b=c') + else: + print ('сумма чисел A и B равна С, но число С не является решением квадратного уравнения ax+b=c') +elif (a*c)+b == 0: + print ('сумма чисел A и B не равна С, но число С является решением квадратного уравнения ax+b=c') +else: + print ('сумма чисел A и B не равна С и число С не является решением квадратного уравнения ax+b=0') + +word = input('Vvedite slovo') +offset = 1 +int(offset) +for offset in range (len(word)): + print (word [offset::]) +with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW8\words.csv", 'r', encoding = 'utf-8') as text: + mas = [] + for line in text: + words = line.split(',') + for word in words: + mas.append(word) +words = {} +for i in mas: + word = i.split(';') + words[word[0].strip()] = word[1].strip() +for key in words: + print(key, '...') + p = 3 + for i in range (3): + if input() != words[key]: + p -= 1 + print('Осталось', p, 'попыток') + if p == 0: + print('Вы не угадали слово') + else: + print ('Ура! Вы угадали слово!') + break + + + + +import random +def nouns(): + f = open (r'D:\Desktop\Аня\sentence_generator\nouns.txt','r', encoding = 'UTF-8') + a = f.read() + a = a.split() + arr = [] + for w in a: + arr.append(w) + return random.choice(arr) + f.close() +def adjectives(): + f = open (r'D:\Desktop\Аня\sentence_generator\adjectives.txt','r', encoding = 'UTF-8') + a = f.read() + a = a.split() + arr = [] + for w in a: + arr.append(w) + return random.choice(arr) + ' ' + nouns () +def verbs(): + f = open (r'D:\Desktop\Аня\sentence_generator\verbs.txt','r', encoding = 'UTF-8') + a = f.read() + a = a.split() + arr = [] + for w in a: + arr.append(w) + return random.choice(arr) + f.close() +def adverbs(): + f = open (r'D:\Desktop\Аня\sentence_generator\adverbs.txt','r', encoding = 'UTF-8') + a = f.read() + a = a.split() + arr = [] + for w in a: + arr.append(w) + return random.choice(arr) + f.close() +def assertion(): + return(adjectives()) + ' ' + (verbs()) + 't' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) +def sentence(): + return 'La ' + (assertion()) + '.' +def negation(): + return 'La ' + (adjectives()) + ' ' + 'ne' + ' ' + (verbs()) + 't' + ' ' + 'pas ' + 'une ' + (adjectives()) + ' '\ + + (adverbs()) + '.' +def question(): + return 'La ' + (adjectives()) + ' ' + (verbs()) + 't' + '-elle ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '?' +def conditions (): + return 'Si ' + (assertion()) + ', ' + (assertion()) + '.' +def imperative(): + a = str(verbs()) + return (a.capitalize() + 's' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '!') +mas = [(sentence()),(negation()),(question()),(conditions ()),(imperative ())] +mass = [] +for i in range (len(mas)): + for item in mas: + randitem = random.choice(mas) + if randitem not in mass: + mass.append(randitem) + print(randitem) +def content(name): + with open (name, 'r', encoding = 'utf-8') as f: + content = f.read() + return content +name = r"C:\Users\Анна\Documents\GitHub\prog\PythonHW10\Squirrels.html" +import re +reg = u'(Отряд:

Научная сфера:", a[i+1]) + if r1 and r2: + r = re.search("(

\ +.*

)", \ + a[i+2]) + break + f.close() + return r +def func2(): + if func1(): + title = func1().group(2) + else: + print ('что-то пошло не так') + return title +f = open("text_wiki.txt", 'w', encoding = "utf-8") +f.write(func2()) +f.close() +f = open("text_wiki.txt", encoding = "utf-8") +a = f.readlines() +for line in a: + print(line) +import os +alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ. ' +def func1(): + number = 0 + arr1 = [] + for i in os.listdir('.'): + if os.path.isfile(i): + j = 0 + check1 = True + check2 = 0 + for j in range(len(i)): + if i[j] not in alphabet: + check1 = False + if i[j] == '.': + check2 += 1 + if check1 == True and check2 <= 1: + number += 1 + arr1.append(i) + print('Найдено файлов, название которых состоит \ +только из латинских символов: ' + str(number)) + return arr1 +def func2(arr): + arr2 = [] + for i in arr: + if i[0:i.find('.')] not in arr2: + arr2.append(i[0:i.find('.')]) + for k in arr2: + print (k) + return +func2(func1()) +import re +def func1(): + f1 = open("Философия -- Википедия.txt", 'r', encoding = "utf-8") + change1 = re.sub('Филос(о́|о)фи(я(х|ми?)?|и|е?й|ю)', 'Астрол\\1ги\\2', f1.read()) + change2 = re.sub('философи(я(х|ми?)?|и|е?й|ю)', 'астрологи\\1', change1) + f1.close() + return change2 +def func2(): + f2 = open("Астрология.txt", 'w', encoding = "utf-8") + f2.write(func1()) + f2.close() + return True +func2() +import re +def func1(): + arr = [] + i = 0 + f = open("Космическая программа Китая.txt", encoding = "utf-8") + a = f.readlines() + for line in a: + arr.append(line) + f.close() + return arr +i = 0 +arr1 = [] +for i in range(len(func1())): + res = re.findall('«[А-Яа-я ]*-[1-9]»', func1()[i]) + j = 0 + for j in range(len(res)): + if res[j] not in arr1: + arr1.append(res[j]) + print (res[j]) +word=input('Введите русское существительное первого склонения') +if word.endswith('а') or word.endswith ('я'): + print ('Именительный падеж, единственное число') +elif word.endswith ('ами') or word.endswith ('ями'): + print ('Творительный падеж, множественное число') +elif word.endswith('ы') or word.endswith ('и'): + print ('Родительный падеж, единственное число или именительный или винительный падеж, множественное число') +elif word.endswith('е'): + print ('Дательный или предложный падеж, единственное число') +elif word.endswith('ой') or word.endswith ('ою') or word.endswith('ёй') or word.endswith ('ёю')or word.endswith ('ею') or word.endswith ('ей'): + print ('Творительный падеж, единственное число') +elif word.endswith('у') or word.endswith('ю'): + print ('Винительный падеж, единственное число') +elif word.endswith ('ам') or word.endswith ('ям'): + print ('Дательный падеж, множественное число') +elif word.endswith ('ах') or word.endswith('ях'): + print ('Предложный падеж, множественное число') +else: + print ('Родительный или винительный падеж, множественное число') +import re +def func3b(string): + r = re.match('([А-Яа-я][a-я]*)= 10: + print (word, freqdict(func1())[word]) +a=9 +a=int(a) +s=input('Введите число') +s=int(s) +if a==s: + print('Позравляю, вы угадали') +else: + if a>s: + print('Загаданное число больше') + if a.+', line) + w_sum += len(arr1) + arr2 = re.findall('ana', line) + ana_sum += len(arr2) + print (str(ana_sum/w_sum)) + return +func1() +def func1(): + arr = [] + i = 0 + f = open("1.txt", encoding = "utf-8") + a = f.readlines() + for line in a: + words = line.split() + for i in range(len(words)): + words[i] = words[i].lower() + words[i] = words[i].strip('.,!?/\|()";:') + arr.append(words[i]) + f.close() + return arr +def func2(x,arr): + glasnye = 'аяоёуюэеыи' + slova = [] + i = 0 + for i in range(len(arr)): + j = 0 + slogi = 0 + for j in range(len(arr[i])): + if arr[i][j] in glasnye: + slogi += 1 + if slogi == x: + slova.append(arr[i]) + return slova +def func3(bukva,arr): + slova = [] + for word in arr: + if word[0] == bukva: + slova.append(word) + return slova +y = input('Введите букву русского алфавита ') +print(func3(y,func1())) +def func1(): + arr = [] + i = 0 + f = open("1.txt", 'r', encoding = "utf-8") + a = f.readlines() + for line in a: + words = line.split() + for i in range(len(words)): + words[i] = words[i].lower() + arr.append(words[i].strip(',.()«»!')) + f.close() + arr.sort() + return arr +def freqdict(arr): + word_count = {} + for word in arr: + if word not in word_count: + word_count[word] = 1 + else: + word_count[word] += 1 + return word_count +f1 = open("2.tsv", 'w', encoding = "utf-8") +for j in sorted(freqdict((func1()))): + f1.write(j) + f1.write('\t') + f1.write(str(freqdict(func1())[j])) + f1.write('\n') +f1.close() +alphabet = 'abcdefghijklmnopqrstuvwxyzабвгдеёжзийклмнопрстуфхцчшщъыьэюя' +alphabet = list(alphabet) +def freqdict1(arr): + letter_count = {} + for letter in alphabet: + letter_count[letter] = 0 + for word in arr: + if word.startswith(letter): + letter_count[letter] += 1 + return letter_count +f2 = open("3.tsv", 'w', encoding = "utf-8") +for k in sorted(freqdict1(func1())): + f2.write(k) + f2.write('\t') + f2.write(str(freqdict1(func1())[k])) + f2.write('\n') +f2.close() +a=9 +a=int(a) +s=input('Введите число ') +if len(s)==0: + print ('Game over') +s=int(s) +while a!=s: + if a>s: + print('Загаданное число больше') + if a.+", line) + if s1: + if s1.group(1) not in freqdict: + freqdict[s1.group(1)] = 1 + else: + freqdict[s1.group(1)] += 1 + return freqdict +def func2(): + f3 = open("Ключи.txt", 'w', encoding = "utf-8") + for i in func1(): + f3.write(i) + f3.write('\n') + f3.close() + return True +func2() +def func3(): + f4 = open("Прилагательные.txt", 'w', encoding = "utf-8") + for i in func1(): + s2 = re.search("l.f...", i) + if s2: + f4.write(i) + f4.write(' ') + f4.write(str(func1()[i])) + f4.write('\n') + f4.close() + return True +func3() +def func4(): + f5 = open("Внутри тега body.txt", 'r', encoding = "utf-8") + change1 = re.sub("(.+)", "\\1 \\2 \\3", f5.read()) + change2 = re.sub("<.*>", ' ', change1) + f5.close() + return change2 +import os +import re +def preprocessing(): + all_meta = [] + w = open('results.txt', 'w', encoding = 'utf-8') + for el in os.listdir('news'): + with open(os.path.join('news',el), 'r', encoding = 'Windows-1251') as f: + article = f.read() + + sentences = re.findall(r'', article) + template = '{} {}\n' + w.write(template.format(el, len(sentences))) + + author = re.findall(r'', article) + authorstr = author[0] + authorstr = re.sub('', '', authorstr) + topic = re.findall(r'', article) + topicstr = topic[0] + topicstr = re.sub('', '', topicstr) + meta = el+','+authorstr+','+topicstr+'\n' + all_meta.append(meta) + + words = [] + wordsraw = re.findall('.+', article) + for el in wordsraw: + wordsrawstr = el + wordsrawstr = re.sub('', '', wordsrawstr) + wordsrawstr = re.sub('', '', wordsrawstr) + wordsrawstr = re.sub('`', '', wordsrawstr) + wordsrawstr = wordsrawstr.lower() + words.append(wordsrawstr) + bigrams = [] + for ind in range(1, len(words) - 1): + bigrams.append(' '.join([words[ind - 1], words[ind]])) + w.close() + return bigrams, all_meta +def data(all_meta): + w = open('metadata.csv', 'w', encoding = 'utf-8') + w.write('Название файла,Автор,Тематика текста\n') + for el in all_meta: + w.write(el) + w.close() +def bigram_processing(bigrams): + w = open('bigrams_res.txt', 'w', encoding = 'utf-8') + for el in bigrams: + if re.match(r'(в|на|о|об|обо|при|по) .+(е|и|ах|ях)', el) != None: + bigram = el + '\n' + w.write(bigram) + w.close() +bigrams, all_meta = preprocessing() +bigram_processing(bigrams) +data(all_meta) +w=input('Введите слово: ') +while w == '': + w=input('Попробуйте еще раз: ') +border = 1 +for i in range (len(w) // 2): + print (w[border:len(w) - border]) + border += 1 +import re +def print_forms(): + with open('rudin.txt', 'r', encoding='utf-8') as f: + text = f.read() + wordarr = text.split() + sit_arr = [] + for word in wordarr: + word.lower() + word.strip(',...!?-–— :,') + t = re.match('си(жу|д(е(ть|в((ши)?й?)?|л(а|о|и)?)|и(те?|м|шь)?|я(т|щий)?))', word) + if (t != None) and (word not in sit_arr): + sit_arr.append(word) + for el in sit_arr: + print (el) +print_forms() +import random +def create_dict(): + with open('db.txt', 'r', encoding = 'utf-8') as f: + db = f.read() + phrases = db.split('\n') + clues = dict() + keys = [] + phrase_split = [] + for phrase in phrases: + phrase_split = phrase.split() + clues[phrase_split[len(phrase_split) - 1]] = phrase_split[0:len(phrase_split) - 1] + keys.append(phrase_split[len(phrase_split) - 1]) + return clues, keys +def show(clues, keys, shown): + key = random.choice(keys) + while key in shown: + key = random.choice(keys) + clue_arr = clues[key] + for el in clue_arr: + print(el, end = ' ') + guess = input() + if guess.lower() == key: + check = True + else: + check = False + return check, key +def result(check): + congrats = ['Поздравляю!', 'horoshego dnya!', 'Угадали!', 'Верно!', 'Хорошо сработано!'] + condolences = ['Попробуйте еще раз!', 'Не отчаивайтесь, продолжайте!', 'Почти в точку... у вас есть еще попытка!', 'nichego, zavtra otgadaete!', 'escho chut-chut...'] + if check == True: + print(random.choice(congrats)) + else: + print(random.choice(condolences)) +def run(): + shown = [] + for i in range (10): + clues, keys = create_dict() + check, key = show(clues, keys, shown) + result(check) + if check == True: + shown.append(key) +run() +print('Всего доброго!') +import re +def change(): + with open('aves.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + text = re.sub(r'\bптице.\b', r'рыбо.', text) + text = re.sub(r'\bПтице.\b', r'Рыбо.', text) + text = re.sub(r'\bптиц', r'\bрыб', text) + text = re.sub(r'\bПтиц', r'\bРыб', text) + with open('fish.txt', 'w', encoding = 'utf-8') as f: + f.write(text) + print('Текст записан в файл fish.txt') +change() +import re +def search(): + with open('chuvash.html', 'r', encoding = 'utf-8') as f: + source = f.read() + search_arr = source.split('', el) + codearr = re.split('', el[1]) + result = codearr[0] + return result +def record(result): + with open('blank.txt', 'w', encoding = 'utf-8') as f: + f.write(result) + f.close() + print('Трехбуквенный код языка записан в файл blank.txt') +result = search() +record(result) +print('Введите число') +n=float(input()) +while n<2: + print ('Отсутствует степени 2, не превышающие', n, 'Пожалуйста, введите число не меньше двух.') + n=float(input()) +d=2 +print ('Степени числа 2, не превышающие', n, ':', end=' ') +while d<=n: + print(d, end=' ') + d*=2 +def opentext (title): + with open(title, 'r', encoding='utf-8') as f: + text = f.read() + arr = [] + arr = text.split() + for elem in arr: + elem.lower() + elem.strip('!-./?"", ') + return arr +def firstletter(letter, arr): + wordsarr = [] + for elem in arr: + if letter == elem[0:2]: + wordsarr.append(elem) + return wordsarr +def questions(): + file_name = input('Введите путь к файлу: ') + minlen = int(input('Введите минимальную длину слова: ')) + arr = opentext(file_name) + un_words = firstletter('un', arr) + return minlen, un_words +def count(minlen, un_words): + wordslen = [] + n = 0 + for elem in un_words: + for letter in elem: + n += 1 + if n > minlen: + wordslen.append(elem) + n = 0 + print('Количество слов, начинающихся с un:', len(un_words)) + print('Процент слов длинее', minlen, ':', len(wordslen)/len(un_words)*100) +minlen, un_words = questions() +count(minlen, un_words) +import os +import shutil +import re +def countfolders(): + obj = os.listdir() + folders = [el for el in obj if os.path.isdir(el)] + result = [] + for folder in folders: + if r'[a-z]|[A-Z]' and r'[а-яё]|[А-ЯЁ]' in folder: + result.append(folder) + print(folder) + print('Всего папок, удовлетворяющих условию:', len(result)) +countfolders() +import os +def mostfiles(): + number = {root : len(files) for root, dirs, files in os.walk('.')} + c = 0 + folder = '' + for root in number: + if number[root] > c: + c = number[root] + folder = root + print('Количество файлов в папке по адресу', folder, ':', c) +mostfiles() +text = open('exomars.txt','r',encoding='utf-8') +arr = [] +countline = 0 +countall = 0 +symb = 0 +for line in text: + countall += 1 + arr = line.split( ) + for el in arr: + if el == '—': + symb += 1 + countwords = len(arr) - symb + if countwords > 5: + countline += 1 +text.close() +print('Всего строк:', countall,'Строк с числом слов больше 5:', countline, 'Процент:', round(countline*100/countall), '%') +arr = [] +arr1 = [] +i = 0 +print('Пожалуйста, введите 8 слов') +while i != 8: + word = input() + arr.append(word) + i += 1 +i = 0 +while i <= 6: + pair = arr[i] + arr[i+1] + arr1.append(pair) + i += 2 +for el in arr1: + print (el) +import random +def adj(): + adj_arr = [] + contadj = open('esenin_adj_pl.txt', 'r', encoding='utf-8') + for line in contadj: + line_lc = line.capitalize().strip() + adj_arr.append(line_lc) + contadj.close() + return random.choice(adj_arr) +def noun(): + noun = [] + contnoun = open('spi_noun_pl.txt', 'r', encoding='utf-8') + for line in contnoun: + line_lc = line.lower().strip() + noun.append(line_lc) + contnoun.close() + return random.choice(noun) +def verb(): + verbs = [] + contverbs = open('majakovsky_verbs.txt', 'r', encoding='utf-8') + for line in contverbs: + line_lc = line.capitalize().strip() + verbs.append(line_lc) + contverbs.close() + return random.choice(verbs) +def adv(): + adv = [] + contadverb = open('pushkin_adverbs.txt', 'r', encoding='utf-8') + for line in contadverb: + line_lc = line.lower().strip() + adv.append(line_lc) + contadverb.close() + return random.choice(adv) +def prop(): + prop = [] + contprop = open('properties.txt', 'r', encoding='utf-8') + for line in contprop: + line_lc = line.lower().strip() + prop.append(line_lc) + contprop.close() + return random.choice(prop) +def line1(): + syll = 0 + while syll != 5: + syll = 0 + first = adj() + ' ' + noun() + for letter in first: + if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': + syll += 1 + return first +def line2(): + syll = 0 + while syll != 7: + syll = 0 + second = verb() + ' ' + adv() + ' ' + adv() + random.choice(['!','?','.','...']) + for letter in second: + if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': + syll += 1 + return second +def line4(): + syll = 0 + while syll != 7: + syll = 0 + fourth = verb() + ' ' + adv() + random.choice(['!','?','.','...']) + for letter in fourth: + if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': + syll += 1 + return fourth +def line5(): + syll = 0 + person=['Я','Ты'] + while syll != 7: + syll = 0 + fifth = random.choice(person) + ' ' + prop() + ' ' + adv() + random.choice(['!','?','.','...']) + for letter in fifth: + if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': + syll += 1 + return fifth +print(line1()) +print(line2()) +print(line1()) +print(line4()) +print(line5()) +print('Введите три числа') +a,b,c=float(input()), float(input()), float(input()) +div=a/b +deg=a**b +if div==c: + print ('Результат деления А на B равен С') +else: + print ('Результат деления А на B НЕ равен С') +if deg==c: + print ('А в степени B равно С') +else: + print ('А в степени B НЕ равно С') +import re +def preproc(): + with open('text.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + allsent = re.split(r'[\.\?\!]', text) + allsent = [sent.lower() for sent in allsent] + allsent = [re.sub(r'[,—“\':”\(\)]', '', sent) for sent in allsent] + return allsent +def count(sent): + num = {word : sent.count(word) for word in sent} + several = {word : num[word] for word in num if num[word]>1} + if several == {}: + several = {'Повторяющихся слов' : '0'} + return several +def display(several): + print('Следующее предложение: ') + template = '{:^10} {:^10}' + for keyword in several: + print(template.format(keyword, several[keyword])) +allsent = preproc() +for sentence in allsent: + arr = re.split(r' ', sentence) + several = count(arr) + display(several) +import re +def opencount(): + with open('corp.xml', 'r', encoding = 'utf-8') as f: + text = f.readlines() + c = 0 + for line in text: + line.strip('\s') + if '' not in line: + c += 1 + else: + break + numheader = str(c) + '\n' + return numheader, text +def create_dict(text): + newdict = {} + typearr = [] + allmorphs = [] + for line in text: + if '') + allmorphs.append(morph1[0]) + keys = [] + for el in allmorphs: + if el not in keys: + keys.append(el) + for key in keys: + num = allmorphs.count(key) + newdict[key] = num + return newdict +def writenum(c, newdict, neutrum, csvarr): + with open('result.txt', 'w', encoding = 'utf-8') as f: + f.write(c) + for key, freq in newdict.items(): + string = str(key) + ':' + str(freq) + '\n' + f.write(string) + line = '' + for el in neutrum: + line += el + ', ' + line += '\n' + f.write(line) + for el in csvarr: + f.write(el) + print('Записано.') +def search_pro_n(text): + neutrum = [] + for line in text: + q = re.search('type="f.h', line) + if q != None: + form1 = line.split('">') + form2 = form1[1].split('') + arr1 = arr[1].split('') + arrlines = arr1[0].split('\n') + for line in arrlines: + if '', ', ', line) + line = re.sub('', '\n', line) + csvarr.append(line) + return csvarr +c, text = opencount() +newdict = create_dict(text) +neutrum = search_pro_n(text) +csvarr = wholecorpora() +writenum(c, newdict, neutrum, csvarr) + +file = open ("цитаты1.txt", "r", encoding = "utf-8") +stroki = 0 +for line in file: + arr = line.split('—') + ar = arr[0].split() + if len(arr) > 0: + if len(ar) < 10: + print (arr[0]) + + + +a = float(input ('Введите a:')) +b = float(input ('Введите b:')) +c = float(input ('Введите c:')) +U1 = U4 = False +if a * b == c: + U1 = True + print ('Выполняется условие 1') +if a * c + b == 0: + U4 = True + print ('Выполняется условие 4') +if U1 and U4: + print ('Выполняются условия 1 и 4') +else: + if U1 == False and U4 == False: + { + print ('НЕ выполняется ни одно из условий 1 или 4') + } +print ('Для завешения нажмите ENTER') +ENTER = input('') +import re +def get_text(fn): + a = [] + with open(fn, 'r', encoding = "utf-8") as f: + for line in f: + a.append(line) + return a +def main(): + text = get_text('Санкт-Петербург — Википедия.html') + reg = '
]*?>(UTC[+-]?\d{1,2}:?\d{0,2})' + for ti in text: + m = re.search(reg, ti) + if m != None: + print(m.group(1)) + return m.group(1) +def record(): + r = main() + f = open("result.txt","w") + f.write("Часовой пояс - " + r) + f.close() +record() +def open_text(): + + with open('Austen Jane. Pride and Prejudice.txt', "r", encoding = "utf-8") as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip('.,!?-;:“"”''') + return arr + +def isness(word): + + Ret = 0 + if len(word) > 4: + if word[-4:] == 'ness': + Ret = 1 + else: + Ret = 0 + return Ret +def AddInList(word, List, Qn): + + Yes = 0 + for i in range(len(List)): + if (List[i] == word): + Qn[i] +=1 + Yes = 1 + if (Yes == 0): + List.append(word) + Qn.append(1) +Inarr = open_text() +List = list() +Qn = list() +for i in range (len(Inarr)): + if isness(Inarr[i]) == 1: + AddInList(Inarr[i], List, Qn) +print('Количество разных сущ. с суффиксом -ness равно: ' + str(len(List))) +Max = 0 +Ind = 0 +for i in range(len(List)): + if Qn[i] > Max: + Ind = i + Max = Qn[i] +print('Максимальную частотность имеет слово: ' + List[Ind] + ', с частотностью: ' + str(Qn[Ind])) +import os +import re +def papka(): + folder = [f for f in os.listdir('.')if not re.search(r'[0-9]+',f)if os.path.isfile(f)] + print(len(folder)) + return folder +papka() +def dop(): + arr = [] + astr = 0 + exist = 0 + folder = [f for f in os.listdir('.')] + for p in range(len(folder)): + for j in range(len(folder[p])): + if folder[p][j] == '.': + astr = folder[p][0:j] + exist = 0 + for k in range(len(arr)): + if arr[k] == astr: + exist = 1 + if exist == 0: + arr.append(astr) + return arr +print(dop()) +word = input ("Введите слово на кириллице:") +i = 0 +while i < len(word): + if word[i] == 'п' or word[i] == 'о' or word[i] == 'е': + print (word[i]) + i = i+2 +print ("Для завершения работы нажмите ENTER") +ENTER = input ('') +import re +def open_text(): + with open('Викинги — Википедия.html', "r", encoding = "utf-8") as f: + text = f.read() + return text +def replacement(): + result1 = re.sub('викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'бурундук\\1', open_text()) + result2 = re.sub('Викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'Бурундук\\1', result1) + return result2 +def record(): + r = replacement() + f = open("result.txt","w", encoding = "utf-8") + f.write(r) + f.close() + return f +record() + +import re +import os +import csv +def first(): + reg = '' + for i in os.listdir('.'): + if i.endswith('.xhtml'): + m = [] + with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: + text = t.read() + for t in re.findall(reg, text): + m.append(t) + with open('new_text.txt', 'a', encoding = 'utf-8') as f: + f.write(i+'\t'+str(len(m)) + '\n') +first() + +def second(): + for i in os.listdir('.'): + reg = '' + with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: + text = t.read() + for t in re.findall(reg, text): + if re.search('', text): + with open('table.csv', 'a', encoding = 'utf-8') as f: + f.write(i+','+re.search('', text)) +second() +import random + +def read_words(filename): + + file = open(filename, "r", encoding = "utf-8") + arr = [] + for line in file: + arr += line.strip().split(', ') + file.close() + return arr +def verb(number): + + + if number == 's': + return random.choice(read_words("singular_verbs.txt")) + else: + return random.choice(read_words("plural_verbs.txt")) +def noun(number): + + + if number == 's': + return random.choice(read_words("singular_nouns.txt")) + else: + return random.choice(read_words("plural_nouns.txt")) +def clinoun(): + + + return random.choice(read_words("clitic_noun.txt")) +def adverb(): + + + return random.choice(read_words("adverb.txt")) +def punctuation(): + + return random.choice(read_words("punctuation.txt")) +def verse1(): + + return clinoun() + ' ' + noun('s') + ' ' + adverb() + ' ' + verb('s') + punctuation() +def verse2(): + + return noun('pl') + ' ' + verb('pl') + ' ' + adverb() + ' ' + clinoun() + punctuation() +def verse3(): + + return noun('s') + ' ' + adverb() + ' ' + clinoun() + ' ' + verb('s') + punctuation() +def make_verse(): + + verse = random.choice([1,2,3]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + else: + return verse3() +for n in range(4): + print(make_verse()) +def open_text(): + with open('green.txt', "r", encoding = "utf-8") as f: + text = f.read() + arr = text.split('.') + return arr +def deli(): + txt = open_text() + for i, w in enumerate(txt): + for s in '.,!?-;:“"”''()«»–': + txt[i] = txt[i].replace(s, "") + return txt +def des(): + txt = deli() + dlina = [x for x in txt if len(x.split()) > 10] + return dlina +def big(): + txt = des() + f = [] + for i in txt: + f += [x for x in i.split() if x[0].isupper()] + return f +print (big()) +file = open("text.txt", "r", encoding = "utf-8") +lmin = lmax = len(file.readline()) +for line in file: + lp = len(line) + if lp > 0: + if lmin > lp: + lmin = lp + if lmax < lp: + lmax = lp +print (lmax / lmin) + +def open_text_1(): + + with open('islandcorp.xml', "r", encoding = "utf-8") as f: + Line = 0 + for i in f: + if i != '\n': + Line += 1 + else: + break + return Line +def record(): + with open("result1.txt","w", encoding = "utf-8") as f: + f.write(str(open_text_1())) + return +import re + +def keys(): + + with open('islandcorp.xml', "r", encoding = "utf-8") as f: + text = f.read() + Dic = {} + reg = '.*?' + res = re.findall(reg, text) + + for i in range(len(res)): + if res[i] not in Dic: + Dic[res[i]] = 1 + else: + Dic[res[i]] += 1 + + return Dic +def record1(): + with open("result2.txt","w", encoding = "utf-8") as f: + a = keys() + for key in a: + f.write(key + ',' + str(a[key])+ '\n') +record() +record1() +import os +import re +def main(): + Sum = 0 + for root, dirs, files in os.walk('.'): + for d in dirs: + cir = 0 + for i in range(len(d)): + a = re.search(r'[а-яёЁ А-Я]+',d[i]) + if a == None: + cir = 1 + if cir == 0: + Sum += 1 + return Sum +print(main()) +arr =[] +word = input("Введите слово: ") +while word: + arr.append(word) + word = input ("Введите слово: ") +w = 0 +for w in range (len(arr)): + if len(arr[w]) > 5: + print (arr[w]) +print ("Для завершения работы нажмите ENTER") +ENTER = input ('') +import re +def open_text(): + + with open('txtfind.txt', "r", encoding = "utf-8") as f: + text = f.read() + text = text.lower() + arr = text.split() + for i, w in enumerate(arr): + arr[i] = arr[i].strip('.,!?-;:“"”''') + return arr +def find_in_text(): + + List = list() + regex = '\W?(на(((й((д(у(т(ся)?)?|ёшь(ся)?|ёт(ся|е(сь)?)?|ём(ся)?|и|ите(сь)?|я|енный|ены))|ти(сь)?)))|(ш(ёл(ся)?|л(а|и|о)(сь)?|едш(и|(ий|ая|ее)(ся)?)))))\W?' + words = open_text() + for i in range (len(words)): + m = re.search(regex, words[i]) + if m != None: + List.append(words[i]) + return List +uList = list() +List = find_in_text() +for i in range(len(List)): + Include = 0 + for j in range(len(uList)): + if uList[j] == List[i]: + Include = 1 + if Include == 0: + print(List[i]) + uList.append(List[i]) +word = ('abracadabra') +i=0 +while i <= len(word): + print (word[0:i]) + i = i+1 +import random +def get_words(fn): + + words = {} + with open(fn, 'r') as fd: + for line in fd: + word, collocations = line.split(',', 1) + words[word] = collocations.replace(word, '.'*len(word)).split(',') + return words +def ask_riddle(words_dict): + + words = list(words_dict.keys()) + rnd_word = random.choice(words) + rnd_collocation = random.choice(list(words_dict[rnd_word])) + print(rnd_collocation) + word = input('Пропущенное слово:') + return rnd_word, word == rnd_word +def main(): + + words = get_words('f3.csv') + word, answer = ask_riddle(words) + print('И это правильный ответ!' if answer else 'Вы ошиблись, правильный ответ: '+ word) + return word, answer +main() +import re +def main(): + s = '' + f = open("Викинги.html","r",encoding="utf-8") + for line in f: + line = re.sub("в(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","бурундук\\2",line) + line = re.sub("В(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","Бурундук\\2",line) + s = s + line + f.close() + return s +def record(): + s = main() + f = open("results.txt","w",encoding='utf-8') + f.write(s) + f.close() +record() +a=int(input('input a number1: ')) +b=int(input('input a number2: ')) +c=int(input('input a number3: ')) +print('\na=',a,'\nb=',b,'\nc=',c) +if a*b==c: + print('\nПроизведение чисел a и b равно числу c') +else: + print('\nПроизведение чисел a и b не равно c') +if a*c+b==0: + print('Число c является решением линейного уравнения a*x+b=0') +else: + print('Число c не является решением линейного уравнения a*x+b=0') +print('\nЧтобы завершить программу, нажмите Enter') +ENTER=input('') + +import random +def phrase(): + f0 = open("plus1.txt","r",encoding="utf-8") + pr1 = f0.read().split() + p1 = random.choice(pr1) + f1 = open("plus2.txt","r",encoding="utf-8") + pr2 = f1.read().split() + p2 = random.choice(pr2) + return p1 + ' ' + p2 +def adjective(): + f2 = open("adj.txt","r",encoding="utf-8") + adj = f2.read().split() + return random.choice(adj) +def verb(): + f3 = open("verb.txt","r",encoding="utf-8") + v = f3.read().split() + return random.choice(v) +def noun(num): + f4 = open("sg.txt","r",encoding="utf-8") + nounsg = f4.read().split() + f5 = open("pl.txt","r",encoding="utf-8") + nounpl = f5.read().split() + f6 = open("ind.txt","r",encoding="utf-8") + nounind = f6.read().split() + if num == 'pl': + return random.choice(nounpl) + if num == 'ind': + return random.choice(nounind) + return random.choice(nounsg) +def punctuation(): + puncts = [".", "?", "!", "...",";"] + return random.choice(puncts) +def verse1(): + return phrase() + ' ' + noun("sg") + ' ' + noun("pl") + punctuation() +def verse2(): + return verb() + ', ' + verb() + ' ' + noun("ind") + punctuation() +def verse3(): + return noun("sg") + ' ' + adjective() + ' ' + noun("pl") + punctuation() +def doit(): + verse = random.choice([1,2,3]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + else: + return verse3() +for n in range(4): + print(doit()) +import os +import re +def texts(name): + f = open(name, 'r') + text = f.read() + x = re.findall('.+', text) + f.close() + return x +def resutls(s,fname): + f = open(fname,"w",encoding = "utf-8") + f.write(s) + f.close() +def words(): + s = "" + for roots, dirs, files in os.walk('.'): + for file in files: + if file.endswith('.xhtml'): + s = s + file + "\t"+ str(len(texts(os.path.join(roots,file)))) + "\n" + results(s,"result1.txt") +if __name__ == '__words__': + words() +import os +def main(): + num = 0 + for root, dirs, files in os.walk('.'): + for d in dirs: + k = 0 + for i in d: + if i not in "йцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮ": + k += 1 + if k == 0: + num += 1 + return num +if __name__ == '__main__': + print(main()) +import random +def words(): + f = open("1.csv","r",encoding="utf-8") + a = f.read().split(',') + m = [] + for n in a: + b = n.rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ + m.append(b) + return m +def d(): + m = words() + d = {} + for i in m: + a = i.split() + d[a[0]] = a[1] + return d +def rand(): + m = words() + di = d() + mas = [] + for n in di.keys(): + mas.append(n) + v = random.choice(mas) + return v +def attempt(): + di = d() + v = rand() + j = 0 + for i in di[v]: + j += 1 + print(v,'.'*j) + s = input() + if s == di[v]: + result = "you win" + else: + result = "you lose" + return result +print(attempt()) + +s=input("введите слово: ") +i=0 +for letter in s: + if (i+1)%2!=0 : + if s[i]=='о' or s[i]=='п' or s[i]=='е': + print(s[i]) + i=i+1 +print('\nЧтобы завершить программу, нажмите Enter') +ENTER=input('') +import re +def text(): + a=[] + f = open("Санкт-Петербург.html","r",encoding="utf-8") + for line in f: + a.append(line) + return a +def main(): + a=text() + s='' + p1 = int; p2 = int + regex = '"[A-Z][A-Z][A-Z](\+|-)?[0-9][0-9]?:?[0-90-9]?"' + for line in a: + b=line.split() + for i in b: + res = re.search(regex,i) + if res != None: + p1 = i.find('>') + p2 = i.find('<') + s=i[p1+1:p2] + return s +def record(): + s=main() + f = open("result.txt","w") + f.write("Часовой пояс - "+s) + f.close() +record() +def names(): + import os + m = os.listdir('.') + return m +def main(): + m = names() + newm = [] + num = 0 + for i in m: + k = 0 + for j in i: + if j in '1234567890': + k += 1 + if k == 0: + num += 1 + if '.' in i: + i = i[:i.index('.')] + if i not in newm: + newm.append(i) + print('num = {}'.format(num)) + print(newm) +if __name__ == '__main__': + main() + + + +def text(): + f = open("ness.txt","r",encoding="utf-8") + a = f.read().split() + m = [] + for n in a: + b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ + m.append(b) + return m +def ness(m): + mas = [] + s = "" + for i in m: + if i[-4:] == 'ness': + if i not in s: + mas.append(i) + s = s + i + " " + return mas +def numb(): + m = text() + mas = ness(m) + return len(mas) +def main(): + m = text() + b = ness(m) + mas2 = [] + fr = "" + s = "" + for i in m: + if i[-4:] == 'ness': + s = s + i + " " + for n in b: + mas2.append(s.count(n)) + maxi = mas2[0] + for j in mas2: + if j > maxi: + maxi = j + for n in b: + if s.count(n) == maxi: + fr=fr+" "+n + return fr +print("Количество разных слов на -ness =",numb(),"\nСамое(ые) частотное(ые) -",main()) +import re +def lines(): + f = open('vim4.txt','r',encoding='utf-8') + a = f.read() + c = re.split(r'[.?!]',a) + lines = [' '.join([word.strip('.,<>/?""-=_+''""[]{}()*&^%$ + return lines +def main(): + sents = lines() + results = [] + for line in sents: + k = '' + k = ['+' for w in line.split()] + if len(k) > 10: + for w in line.split(): + if w.istitle() == True: + results.append(w) + return results +if __name__ == '__main__': + print(main()) +import re +def text(): + f = open("portrait.txt","r",encoding="utf-8") + a = f.read().split() + m = [] + for n in a: + b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ + m.append(b) + return m +def main(): + m = text() + regex = 'на(й|ш(е|ё)?)(т|д|л)(ш|енн?)?(а?я?|(и|о|ы|(е|ё)|ую?)?(т|шь)?(ся)?(м(у|и)?|го|е|й|х)?)?' + s = '' + for i in m: + res = re.search(regex,i) + if res != None: + k = 0 + for j in i: + if j not in regex: + k += 1 + if k == 0: + if i not in s: + s = s + i + ' ' + return s +print(main()) +import re +def opp(): + k = 0 + f = open("it.xml","r",encoding="utf-8") + for line in f: + k += 1 + f.close() + return k +def record1(): + f = open('result1.txt','w',encoding='utf-8') + f.write(str(opp())) + f.close() +record1() +def dic(): + d = {} + regex1 = 'lemma="' + regex2 = 'type="[a-zþ0-9]+"' + f = open("it.xml","r",encoding="utf-8") + for line in f: + if re.search(regex1,line) != None: + res = re.search(regex2,line) + if res != None: + p1 = line.rfind('"') + p2 = line.find('type=') + s = line[p2+6:p1] + if s in d.keys(): + d[s] += 1 + else: + d[s] = 1 + return d +def record2(): + d = dic() + f = open('result1.txt','a',encoding='utf-8') + for i in d.keys(): + f.write('\n'+i) + f.close() +record2() +def plur(): + d = {} + regex1 = 'lemma="' + regex2 = 'type="[a-zþ0-9]+"' + f = open("it.xml","r",encoding="utf-8") + for line in f: + if re.search(regex1,line) != None: + res = re.search(regex2,line) + if res != None: + p1 = line.rfind('"') + p2 = line.find('type=') + s = line[p2+6:p1] + if s[0] == 'l' and s[2] == 'f': + if s in d.keys(): + d[s] += 1 + else: + d[s] = 1 + return d +def record3(): + d = plur() + f = open('result2','w',encoding='utf-8') + for i in d.keys(): + f.write(i+' - '+str(d[i])+'\n') + f.close() +record3() +f=open("new1.txt","r",encoding = "utf-8") +mx=mn=len(f.readline()) +for line in f: + if line != "\n": + if len(line) > mx: + mx = len(line) + if len(line) < mn: + mn = len(line) +print(mx/mn) +f.close() + +f=open("text1.txt","r",encoding = "utf-8") +for line in f: + sym=line.split(" ") + if sym[2]=="союз": + print(line) +f.close() + +f=open("text1.txt","r",encoding = "utf-8") +s = input("Введите слово: ") +m = [] +while s!='': + m.append(s) + s=input("Введите слово: ") +for i in m: + for line in f: + sym = line.split(" ") + if i == sym[0]: + print(i,sym[1:]) + else: + print(i+" - в словаре нет такого слова") + break +f.close() + +f=open("text1.txt","r",encoding = "utf-8") +s=0 +for line in f: + sym=line.split(" ") + if sym[4]=="ед" and sym[5]=="жен": + print(sym[0]+",") + s=s+float(sym[-1]) +print(s) +f.close() + +m=[] +s=input('введите слово: ') +while s!='': + m.append(s) + s=input('введите слово: ') +for word in m: + if len(word)>5: + print(word) +print('Чтобы завершить программу, нажмите ENTER') +ENTER=input('') +word=input('введите слово: ') +newword='' +for letter in word: + newword=newword+letter + print(newword) +import re +import os +def countsent(file): + sent = 0 + s = open (file,'r') + lines = s.readlines() + for line in lines: + if re.search('',line): + sent = sent + 1 + return sent +def file_countsent(): + cw = open ('countsent.txt','w',encoding='utf-8') + for root, dirs, files in os.walk('news'): + for f in files: + cw.write(f+'\t'+str(countsent(os.path.join(root, f)))+'\n') +def text_data(txt1): + topic = re.search(r'', txt1).group(1) + author = re.search(r'', txt1).group(1) + data = [author, topic] + return data +def csv(data, name): + with open(name, 'a', encoding='cp1251') as f: + f.write(data[2]+'\t'+data[0]+'\t'+data[1]+'\n') +def supertable(): + data1 = [] + for root, dirs, files in os.walk('news'): + for f in files: + with open(os.path.join(root, f), 'r', encoding='cp1251') as m: + txt = m.read() + data = text_data(txt) + data.append(f) + data1.append(data) + for data in data1: + csv(data, 'supertable.csv') +file_countsent() +supertable() +import re +def openfile(): + file1 = input('Введите путь к файлу: ') + with open(file1, "r", encoding="utf-8") as f: + arr = [] + lines = f.readlines() + for line in lines: + if line.strip() == '': + break + else: arr.append(line) + print('Число строк заголовка', len(arr)) +def dictionary(): + file2 = input('Введите путь к файлу: ') + with open(file2, "r", encoding="utf-8") as f: + dictn = {} + text = f.read() + findtype = re.findall(r'type="\w+">', text) + for i in findtype: + i = i[6::].strip('">') + if i not in dictn: + dictn[i] = 1 + else: + dictn[i] += 1 + file3 = input('Введите путь к файлу, куда будет записана информация из словаря: ') + with open(file3, "r", encoding="utf-8") as f: + for key in dictn: + f.write(str(key, dictn[key])) +openfile() +dictionary() +with open('ugadaika.csv', 'r', encoding = 'utf-8') as f: + words = [] + a = f.read() + words = a.split(',') + dic = {} + for i, word in enumerate(words): + if i%2 == 0: + dic[word] = words[i+1] + print('Я хочу сыграть с тобой в одну игру... Какое слово я загадал? Количество точек равно количеству букв в слове.') + for key in dic: + print(dic[key]) + b = input() + if b == key: + print('Молодчинка!!!') + else: + print ('Ты не очень умный, я загадал не это.') +mylist = [] +with open('proga.txt', 'r', encoding='utf-8') as f: + for line in f.readlines(): + x = len(line) + mylist.append(x) +mini = mylist[0] +maxi = mylist[0] +for i in mylist: + if i <= mini: + mini = i + if i > maxi: + maxi = i +print(maxi/mini) + + +import random +def adj(): + a=[] + with open ('adj.txt','r',encoding='utf-8') as f: + a=f.read() + return random.choice(a.split()) + +def Petya(): + b=[] + with open ('nouns_like_Petya.txt','r',encoding='utf-8') as f: + b=f.read() + return random.choice(b.split()) + +def kustik(): + k=[] + with open ('nouns_like_kustik.txt','r',encoding='utf-8') as f: + k=f.read() + return random.choice(k.split()) +def prep(): + c=[] + with open ('prep.txt','r',encoding='utf-8') as f: + c=f.read() + return random.choice(c.split()) +def adjfem(): + d=[] + with open ('adjfem.txt','r',encoding='utf-8') as f: + d=f.read() + return random.choice(d.split()) +def nounfem(): + e=[] + with open ('nounfem.txt','r',encoding='utf-8') as f: + e=f.read() + return random.choice(e.split()) +def verb(): + g=[] + with open ('verbpf.txt','r',encoding='utf-8') as f: + g=f.read() + return random.choice(g.split()) +def punct(): + h=[] + with open ('punct.txt','r',encoding='utf-8') as f: + h=f.read() + return random.choice(h.split()) +def verse1(): + return adj() + ' ' + Petya() + ' ' + verb() + ' ' + kustik() + punct() +def verse2(): + return prep() + ' ' + adjfem() + ' ' + nounfem() + punct() +def verse3(): + return adj() + ' ' + kustik() + ' ' + verb() + ' ' + Petya() + punct() +def verse4(): + return Petya() + ' ' + verb() + ' ' + nounfem() + punct() +def make_verse(): + verse = random.choice([1,2,3,4]) + if verse == 1: + return verse1() + elif verse == 2: + return verse2() + elif verse == 3: + return verse3() + else: + return verse4() +for n in range(4): + print(make_verse()) +import os +import re +nonum = [] +num = [] +for f in os.listdir('.'): + if re.search('[1234567890]', f): + num.append(f) + else: + nonum.append(f) +print('Файлов, не содержащих цифр в названии: ', len(nonum)) +print('Введите число a и нажмите Enter') +a=int(input()) +print('Введите число b и нажмите Enter') +b=int(input()) +print('Введите число c и нажмите Enter') +c=int(input()) +if a*b==c: + print(c ,'является произведением', a,' и ', b) +else: + print(c ,' не является произведением', a,' и ', b) +if c*a==(-1)*b: + print(c,'является решением линейного уравнения', a,'x +',b,'= 0') +else: + print(c,'не является решением линейного уравнения', a,'x +',b,'= 0') +b=1 +int (b) +a=(input()) +for i in a: + if (b%2)&((i=='о')or(i=='п')or(i=='е')): + print (i) + b+=1 +import re +def vikings(): + wikifile = input('Время альтернативной истории! Введите имя файла со статьей про викингов: ') + with open(wikifile, 'r', encoding = 'utf-8') as f: + wikiarticle = f.read() + return wikiarticle +def change1(wikiarticle): + myarticle1 = re.sub('викинг', 'бурундук', wikiarticle) + return myarticle1 +def change2(myarticle1): + myarticle2 = re.sub('Викинг', 'Бурундук', myarticle1) + return myarticle2 +def chimpunks(myarticle2): + newfile = input('Введите имя файла, куда следует поместить измененную статью: ') + with open(newfile, 'w', encoding = 'utf-8') as f: + f.write(myarticle2) +def go(): + chimpunks(change2(change1(vikings()))) +go() +import re +def findforms(): + find = r"\bна(ш(ёл(ся)?|е(л(ся)?|дш(е(го(ся)?|м(ся|у(ся)?)?|е(ся)?|й(ся)?|ю(ся)?)|ую(ся)?|ая(ся)?|и(й(ся)?|е(ся)?|сь|м(и(ся)?)?|х(ся)?)?))|л(а(сь)?|о(сь)?|и(сь)?))|й(ти(сь)?|д(я(сь)?|у(сь|т(ся)?)?|ё(м(ся)?|шь(ся)?|т(ся|е(сь)?)?|нн(ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?)))|е(шь(ся)?|т(ся|е(сь)?)?|м(ся)?|н(а|о|ы|н((ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?))))?)|и(сь|те(сь)?)?)))\b" + arr = [] + with open("find.txt", "r", encoding="utf-8") as f: + words = f.read() + for word in words.split(): + p = re.search(find, word) + if p != None: + if word not in arr: + arr.append(word) + for item in arr: + print(item) +findforms() +a = [] +s = str(input("Введите слово ")) +while s != (""): + if len(s) > 5: + a.append(s) + s = str(input("Введите слово ")) +print('\n'.join(a)) + + + + + + +def counting(): + with open('isl.txt', 'r', encoding='utf-8') as islen: + islen.read() + str = islen.readline().replace('\n', '') + islenlines = [] + islencount = 0 + for line in islen: + islenlines.append + islencount = 0 + if '' in line: + break + print(islencount) + + + + +counting() +def dictionary(): + lemmas = [] + alsolemmas = [] + str = islen.readline + for i in range(str): + if '(.*?)' + links = re.findall(reg, content) + return links +text = open_html('butterflies.html') +links = find_links(text) +for link in links[:20]: + print(link[1], '-->', link[0]) +d = {"Россия":'Москва', + "Польша":'Варшава', + "США":'Вашингтон', + "Болгария":'София', + "Армения":'Ереван', + "Бразилия":'Бразилиа', + "Испания":'Москва'} + + + + + + + + + + + + + + +def delete_doubles(d): + arr = [] + new = {} + for key in d: + if d[key] in arr: + else: + new[key] = key + arr.append(d[key]) + return a +delete_doubles(d) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re +def open_html(fname): + with open (fname, 'r', encoding='utf-8') as f: + text = f.read() + return text +def tags(text): + m = re.sub(r'<.*?>', r'', text) + t = re.sub(r'\s+',r' ', m) + s = re.sub(r'Илон Маск', r'Маленький котёнок',t) + return s +print(tags(open_html('musk.html'))) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re +rain = r"\b\дожд([ьюи]|е|ей|я(м|ми?)|ях|ём?)?\b" +s = input('Введите какое-нибудь слово: ') +m = re.search(rain, s) +if m != None: + print('Это слово является формой слова "дождь"!') +else: + print('Нетушки!') + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import codecs +def open_file(file_name): + f = codecs.open(file_name, 'r', 'utf-8-sig') + words = [] + for line in f: + line = line.strip() + words += line.split() + for word in words: + word = word.strip(u'.,!?:;()\'\"1234567890') + word = word.lower() + return words +def bigramms(words): + + bi = create_list(words) + dic = {} + for j in bi: + if j not in dic: + dic[j] = 1 + else: + dic[j] += 1 + answer = '' + answer = [n + '\r\n' for n in dic] + print(answer) + return answer +def create_list(words): + bi = [] + for i in range(len(words)): + if i < (len(words) - 1): + j = i+1 + bi.append(words[i] + words[j]) + return bi +words = open_file('text.txt') +bigramms(words) +import re +with open('news.txt', 'r', encoding = 'utf-8') as f: + text = f.read() +punct = '[.,?!:;"\'—@–...«» +tabs = '[\t\n]' +def preprocessing(text): + text = text.strip().lower() + text = re.sub(punct, '', text) + text = re.sub(tabs, ' ', text) + words = text.split() + return words +words = preprocessing(text) + +def make_freq(arr): + d = {} + for el in arr: + try: + d[el] += 1 + except KeyError: + d[el] = 1 + return d +word_freq = make_freq(words) +def make_bigrams(arr): + bigrams = [] + for i in range(len(words)): + bigr = arr[i] + ' ' + arr[i + 1] + bigrams.append(bigr) + return bigrams +bigrams = make_bigrams(words) + +bigrams_freq = make_freq(bigrams) +from math import log +def count_pmi(x, y): + bigr = x + ' ' + y + try: + p_x = word_freq[x]/len(words) + except KeyError: + p_x = 0 + try: + p_y = word_freq[y]/len(words) + except KeyError: + p_y = 0 + try: + p_xy = bigrams_freq[bigr]/len(bigrams) + except KeyError: + p_xy = 0 + try: + pmi = log(p_xy/(p_x*p_y)) + except ZeroDivisionError: + pmi = 0 + return pmi +def calculate_pmi(): + pmis ={} + for bigr in bigrams: + x, y = bigr.split() + pmi = count_pmi(x, y) + pmis[bigr] = pmi + return pmis +pmi = calculate_pmi() +i = 0 +for el in sorted(pmi, key = lambda m: -pmi[m]): + if i > 100: + break + print(el, pmi[el]) + i += 1 +import os +corpus_anek = '' +corpus_izvest = '' +corpus_teh = '' +for root, dirs, files in os. walk('texts'): + if 'anekdots' in root: + for f in files: + with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: + text = f1.read() + corpus_anek += text + if 'teh_mol' in root: + for f in files: + with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: + text = f1.read() + corpus_teh += text + if 'izvest' in root: + for f in files: + with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: + text = f1.read() + corpus_izvest += text +print(corpus_teh[:100]) +words_anek = preprocessing(corpus_anek) +words_teh = preprocessing(corpus_teh) +words_izvest = preprocessing(corpus_izvest) +words_all = words_anek + words_teh + words_izvest +freq_anek = make_freq(words_anek) +freq_teh = make_freq(words_teh) +freq_izvest = make_freq(words_izvest) +freq_all = make_freq(words_all) +def count_pmi_cats(word, category): + p_word = freq_all[word]/len(words_all) + p_cat = 1/3 + if category == 'anek': + d = freq_anek + w = len(words_anek) + elif category == 'izvest': + d = freq_izvest + w = len(words_izvest) + elif category == 'teh': + d = freq_teh + w = len(words_teh) + p_word_cat = d[word]/w + pmi = log(p_word_cat/(p_word*p_cat)) + return pmi +for w in words: + if i > 100: + break + try: + pmi_anek = count_pmi_cats(w, 'anek') + pmi_izvest = count_pmi_cats(w, 'izvest') + pmi_teh = count_pmi_cats(w, 'teh') + max_pmi = max(pmi_anek, pmi_izvest, pmi_teh) + if max_pmi == pmi_anek: + print(w, 'anek') + elif max_pmi == pmi_izvest: + print(w, 'izvest') + elif max_pmi == pmi_teh: + print(w, 'teh') + except KeyError: + pass + i += 1 + + + + + + + + +import os +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +tabs = '[\t\n]' +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + text_wo_punct = re.sub(tabs, ' ',text_wo_punct) + words = text_wo_punct.strip().split() + return words +def count_tf(word, text): + n = text.count(word) + return n / len(text) +def count_df(word, texts): + + + + + i = [True for text in texts if word in text] + i = sum(i) + return i +def count_idf(word, texts): + df = count_df(word, texts) + try: + idf = len(texts) / df + except ZeroDivisionError: + return 0 + return idf +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + tfidf = log(tf, 10) * log(idf, 10) + return tfidf +def keywords(text, texts): + keywords = {} + dic_tfidf = {} + for word in text: + if word in dic_tfidf: + continue + tfidf = count_tfidf(word, text, texts) + dic_tfidf[word] = tfidf + i = 0 + for el in sorted(dic_tfidf, key = lambda x: dic_tfidf[x]): + if i > 5: + break + i += 1 + keywords[el] = dic_tfidf[el] + return keywords +def main(): + texts = {} + for root, dirs, files in os.walk('wikipedia'): + for f in files: + with open(os.path.join(root, f),'r', encoding = 'utf-8') as t: + content = t.read() + text = preprocessing(content) + texts[f] = text + raw_texts = list(texts.values()) + for t in texts: + print('\nИзвлекаем ключевые слова для текста {}'.format(t)) + kwords = keywords(texts[t], raw_texts) + for key in kwords: + print(key, kwords[key]) +if __name__ == '__main__': + main() +print ("Здравствуйте!"\ + ) +a = int(input("Введите число a: ")) +b = int(input("Введите число b: ")) +c = int(input("Введите число c: ")) +if a + b == c: + print ("Числа a и b в сумме дают число c") +else: + print ("Числа a и b в сумме НЕ дают число c") +if c == -b / a: + print ("Число c является решением линейного уравнения ax + b = 0") +else: + print ("Число c НЕ является решением линейного уравнения ax + b = 0") +import re +import os +def folders(): + counter = 0 + numbers = '[0-9]' + titles = os.listdir('.') + for i in titles: + if os.path.isdir(i) and re.search (numbers, i): + counter += 1 + return str(counter) +def names(): + print('Все файлы и(или) папки в текущей папке: ') + arr = [] + res = '\..+' + for i in os.listdir('.'): + name = i + if os.path.isdir(i): + name = re.sub(res, '', i) + if name not in arr: + arr.append(name) + for each in arr: + if each: + print(each + '\n') + else: + print('None') +print('Количество папок с цифрами в названии в текущей папке: ' + folders()) +names() +def open_read(): + num = 0 + with open('F.xml', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + num += 1 + return num +def write_doc(num): + numlines = str(num) + with open ('Number.txt', 'w', encoding = 'utf-8') as new_doc: + new_doc.write(numlines) + print('Количество строк: ' + numlines + '\n' + 'Создан документ Number.txt') +def main(): + write_doc(open_read()) +main() +import os +a = {} +def dict_new(): + for root, dirs, files in os.walk('.\\news'): + for file in files: + with open (os.path.join(root, file), 'r', encoding = 'cp1251') as page: + raw_text = page.read() + a[file] = raw_text.count('(.*?)', text, flags = re.DOTALL) + cap = cap.group(3) + return cap +def write_doc(cap): + with open ('Capital.txt', 'w', encoding = 'utf-8') as new_doc: + new_doc.write(cap) + print('Столица данной страны: ' + cap + '\n' + 'Создан документ Capital.txt') +def main(): + write_doc(capital(open_read())) +main() +word = input() +text = [] +while word: + text.append(word) + word = input() +for i in range(len(text)): + new = text[i] + new = new[::-1] + new = list(new) + for t in range (len(new)): + if (t + 1) % 3 == 0: + new[t] = '' + wrd = ''.join(new) + print(wrd) +with open('Master and Margarita.txt','r', encoding = 'utf-8') as MM: + tablewords = [] + space = 0 + lines = MM.readlines() + print(' ', *lines) + for i in range(len(lines)): + for k in range(len(lines[i])): + if lines[i][k] == ' ': + space += 1 + tablewords.append(space + 1) + space = 0 + number = 0 + for l in range(len(tablewords)): + number += tablewords[l] + averword = number/len(lines) + print('\n','Среднее количество слов в строке =',averword) +word = input('Введите слово: ') +print(word) +for i in range(len(word)): + print(word[:-(1+i)]) +import random +with open('allwords.txt', 'r', encoding = 'utf-8') as aw: + lines = aw.readlines() + + def noun_m1(): + noun_m1 = [] + noun_m1 = lines[1].split(' ') + return random.choice(noun_m1) + def noun_f1(): + noun_f1 = [] + noun_f1 = lines[2].split(' ') + return random.choice(noun_f1) + def noun_m2(): + noun_m2 = [] + noun_m2 = lines[3].split(' ') + return random.choice(noun_m2) + def noun_f2(): + noun_f2 = [] + noun_f2 = lines[4].split(' ') + return random.choice(noun_f2) + def noun_mid2(): + noun_mid2 = [] + noun_mid2 = lines[5].split(' ') + return random.choice(noun_mid2) + def noun_m3(): + noun_m3 = [] + noun_m3 = lines[6].split(' ') + return random.choice(noun_m3) + def noun_f3(): + noun_f3 = [] + noun_f3 = lines[7].split(' ') + return random.choice(noun_f3) + def noun_mid3(): + noun_mid3 = [] + noun_mid3 = lines[8].split(' ') + return random.choice(noun_mid3) + def noun_m4(): + noun_m4 = [] + noun_m4 = lines[9].split(' ') + return random.choice(noun_m4) + def noun_f4(): + noun_f4 = [] + noun_f4 = lines[10].split(' ') + return random.choice(noun_f4) + def noun_mid4(): + noun_mid4 = [] + noun_mid4 = lines[11].split(' ') + return random.choice(noun_mid4) + def noun_m5(): + noun_m5 = [] + noun_m5 = lines[12].split(' ') + return random.choice(noun_m5) + def noun_f5(): + noun_f5 = [] + noun_f5 = lines[13].split(' ') + return random.choice(noun_f5) + def noun_mid5(): + noun_mid5 = [] + noun_mid5 = lines[14].split(' ') + return random.choice(noun_mid5) + def noun_m6(): + noun_m6 = [] + noun_m6 = lines[15].split(' ') + return random.choice(noun_m6) + def noun_f6(): + noun_f6 = [] + noun_f6 = lines[16].split(' ') + return random.choice(noun_f6) + def noun_mid6(): + noun_mid6 = [] + noun_mid6 = lines[17].split(' ') + return random.choice(noun_mid6) + + def verb_1(): + verb_1 = [] + verb_1 = lines[20].split(' ') + return random.choice(verb_1) + def verb_2(): + verb_2 = [] + verb_2 = lines[21].split(' ') + return random.choice(verb_2) + def verb_3(): + verb_3 = [] + verb_3 = lines[22].split(' ') + return random.choice(verb_3) + def verb_4(): + verb_4 = [] + verb_4 = lines[23].split(' ') + return random.choice(verb_4) + def verb_5(): + verb_5 = [] + verb_5 = lines[24].split(' ') + return random.choice(verb_5) + def verb_6(): + verb_6 = [] + verb_6 = lines[25].split(' ') + return random.choice(verb_6) + + def conj_1(): + conj_1 = [] + conj_1 = lines[28].split(' ') + return random.choice(conj_1) + def conj_2(): + conj_2 = [] + conj_2 = lines[29].split(' ') + return random.choice(conj_2) + + def adj_m1(): + adj_m1 = [] + adj_m1 = lines[32].split(' ') + return random.choice(adj_m1) + def adj_m2(): + adj_m2 = [] + adj_m2 = lines[33].split(' ') + return random.choice(adj_m2) + def adj_f2(): + adj_f2 = [] + adj_f2 = lines[34].split(' ') + return random.choice(adj_f2) + def adj_m3(): + adj_m3 = [] + adj_m3 = lines[35].split(' ') + return random.choice(adj_m3) + def adj_f3(): + adj_f3 = [] + adj_f3 = lines[36].split(' ') + return random.choice(adj_f3) + def adj_mid3(): + adj_mid3 = [] + adj_mid3 = lines[37].split(' ') + return random.choice(adj_mid3) + def adj_m4(): + adj_m4 = [] + adj_m4 = lines[38].split(' ') + return random.choice(adj_m4) + def adj_f4(): + adj_f4 = [] + adj_f4 = lines[39].split(' ') + return random.choice(adj_f4) + def adj_mid4(): + adj_mid4 = [] + adj_mid4 = lines[40].split(' ') + return random.choice(adj_mid4) + def adj_m5(): + adj_m5 = [] + adj_m5 = lines[41].split(' ') + return random.choice(adj_m5) + def adj_f5(): + adj_f5 = [] + adj_f5 = lines[42].split(' ') + return random.choice(adj_f5) + def adj_mid5(): + adj_mid5 = [] + adj_mid5 = lines[43].split(' ') + return random.choice(adj_mid5) + def adj_m6(): + adj_m6 = [] + adj_m6 = lines[44].split(' ') + return random.choice(adj_m6) + def adj_f6(): + adj_f6 = [] + adj_f6 = lines[45].split(' ') + return random.choice(adj_f6) + def adj_mid6(): + adj_mid6 = [] + adj_mid6 = lines[46].split(' ') + return random.choice(adj_mid6) + + def adv_2(): + adv_2 = [] + adv_2 = lines[49].split(' ') + return random.choice(adv_2) + def adv_3(): + adv_3 = [] + adv_3 = lines[50].split(' ') + return random.choice(adv_3) + def adv_4(): + adv_4 = [] + adv_4 = lines[51].split(' ') + return random.choice(adv_4) + def adv_5(): + adv_5 = [] + adv_5 = lines[52].split(' ') + return random.choice(adv_5) + def adv_6(): + adv_6 = [] + adv_6 = lines[53].split(' ') + return random.choice(adv_6) + + def numeral_m2(): + numeral_m2 = [] + numeral_m2 = lines[56].split(' ') + return random.choice(numeral_m2) + def numeral_f2(): + numeral_f2 = [] + numeral_f2 = lines[57].split(' ') + return random.choice(numeral_f2) + def numeral_mid2(): + numeral_mid2 = [] + numeral_mid2 = lines[58].split(' ') + return random.choice(numeral_mid2) + def numeral_m3(): + numeral_m3 = [] + numeral_m3 = lines[59].split(' ') + return random.choice(numeral_m3) + def numeral_f3(): + numeral_f3 = [] + numeral_f3 = lines[60].split(' ') + return random.choice(numeral_f3) + def numeral_mid3(): + numeral_mid3 = [] + numeral_mid3 = lines[61].split(' ') + return random.choice(numeral_mid3) + def numeral_m4(): + numeral_m4 = [] + numeral_m4 = lines[62].split(' ') + return random.choice(numeral_m4) + def numeral_f4(): + numeral_f4 = [] + numeral_f4 = lines[63].split(' ') + return random.choice(numeral_f4) + def numeral_mid4(): + numeral_mid4 = [] + numeral_mid4 = lines[64].split(' ') + return random.choice(numeral_mid4) + def numeral_m5(): + numeral_m5 = [] + numeral_m5 = lines[65].split(' ') + return random.choice(numeral_m5) + def numeral_f5(): + numeral_f5 = [] + numeral_f5 = lines[66].split(' ') + return random.choice(numeral_f5) + def numeral_mid2(): + numeral_mid5 = [] + numeral_mid5 = lines[67].split(' ') + return random.choice(numeral_mid5) + def numeral_f6(): + numeral_f6 = [] + numeral_f6 = lines[68].split(' ') + return random.choice(numeral_f6) + def numeral_mid6(): + numeral_mid6 = [] + numeral_mid6 = lines[69].split(' ') + return random.choice(numeral_mid6) + def row_1_5(): + phrase_of_5_1 =[adj_m1() + ' ' + noun_m4(), adj_m2() + ' ' + noun_m3(), adj_m3() + ' ' + noun_m2(), adj_m4() + ' ' + noun_m1(), + numeral_m2() + ' ' + noun_m1() + ' ' + verb_2(), numeral_m2() + ' ' + noun_m2() + ' ' + verb_1(), + numeral_m2() + ' ' + noun_m3(), numeral_m3() + ' ' + noun_m1() + ' ' + verb_1(), numeral_m3() + ' ' + noun_m2(), + adj_f2() + ' ' + noun_f3(), adj_f3() + ' ' + noun_f2(), adj_f4() + ' ' + noun_f1(), + numeral_f2() + ' ' + noun_f1() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f2() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f3(), + numeral_f3() + ' ' + noun_f1() + ' ' + verb_1(), numeral_f3() + ' ' + noun_f2(), + numeral_mid2() + ' ' + verb_2(), numeral_mid2() + ' ' + noun_mid2() + ' ' + verb_1(), + numeral_mid2() + ' ' + noun_mid3(), + numeral_mid3() + ' ' + verb_1(), numeral_mid3() + ' ' + noun_mid2(),noun_m5(), noun_f5(), noun_mid5()] + return random.choice(phrase_of_5_1) + def row_1_7(): + phrase_of_7_1 =[adv_2() + ' ' + verb_5(), adv_3() + ' ' + verb_4(), adv_4() + ' ' + verb_3(), adv_5() + ' ' + verb_2(), adv_6() + ' ' + verb_1(), + adv_2() + ' ' + verb_4() + ' ' + conj_1(), adv_2() + ' ' + verb_3() + ' ' + conj_2(), + adv_3() + ' ' + verb_3() + ' ' + conj_1(), adv_3() + ' ' + verb_2() + ' ' + conj_2(), + adv_4() + ' ' + verb_2() + ' ' + conj_1(), adv_4() + ' ' + verb_1() + ' ' + conj_2(), + adv_5() + ' ' + verb_1() + ' ' + conj_1(), adv_5() + ' ' + conj_2(), + adv_6() + ' ' + conj_1()] + return random.choice(phrase_of_7_1) + def row_2_5(): + phrase_of_5_2 =[verb_1() + ' ' + noun_m4(), verb_2() + ' ' + noun_m3(), verb_3() + ' ' + noun_m2(), verb_4() + ' ' + noun_m1(), + verb_1() + ' ' + noun_f4(), verb_2() + ' ' + noun_f3(), verb_3() + ' ' + noun_f2(), verb_4() + ' ' + noun_f1(), + verb_1() + ' ' + noun_mid4(), verb_2() + ' ' + noun_mid3(), verb_3() + ' ' + noun_mid2()] + return random.choice(phrase_of_5_2) + def row_2_7(): + phrase_of_7_2 =[noun_m1() + ' ' + verb_6(),noun_m2() + ' ' + verb_5(),noun_m3() + ' ' + verb_4(),noun_m4() + ' ' +verb_3(), + noun_m5() + ' ' + verb_2(), + noun_m6() + ' ' + verb_1(), + noun_f1() + ' ' + verb_6(), noun_f2() + ' ' + verb_5(), noun_f3() + ' ' + verb_4(), noun_f4() + ' ' + verb_3(), + noun_f5() + ' ' + verb_2(), + noun_f6() + ' ' + verb_1(), noun_mid2() + ' ' + verb_5(), noun_mid3() + ' ' + verb_4(), noun_mid4() + ' ' + verb_3(), + noun_mid5() + ' ' + verb_2(), + noun_mid6() + ' ' + verb_1()] + return random.choice(phrase_of_7_2) + def row_3_5(): + phrase_of_5_3 =[verb_5(), adv_5()] + return random.choice(phrase_of_5_3) + def haiku(): + ready = [row_2_5() + '\n' + row_2_7() + '\n' + row_1_5(), + row_3_5() + '\n' + row_2_7() + '\n' + row_3_5(), + row_1_5() + '\n' + row_1_7() + '\n' + row_3_5()] + return random.choice(ready) +print(haiku()) +word = input ('give a word') +lenghth = len(word) +z = 0 +newword ='space' +while newword != '': + newword = '' + newword = word[z:lenghth] + print(newword) + z += 1 + lenghth -= 1 + +import re +def sentences(): + with open ('text.txt','r',encoding = 'utf-8') as f: + text = f.read() + m = re.findall('[^.!?]{1,}?[.?!]', text) + m= [sent.split() for sent in m] + for sentence in m: + for i in range(len(sentence)): + sentence[i] = sentence[i].strip('!?.,;:"').lower() + return m +def output(m): + maxi = max([len(word) for sentence in m for word in sentence]) + sentence_number = 0 + for sentence in m: + sentence_number += 1 + print ('предложение №', sentence_number) + words = [] + for word in sentence: + if word not in words: + words.append(word) + j = 0 + for i in range(0, len(sentence) - 1): + if word == sentence[i]: + j += 1 + if j > 1: + print('{:^{maxi}} {:^2}'.format(word,j, maxi = maxi)) +output(sentences()) + +import csv +import random +def open_file(): + with open('some.csv', 'r') as f: + a =[] + reader = csv.reader(f) + for line in reader: + a.append(line) + return a +def dictionary(a): + d = {} + for i in range(0,5): + d[a[0][i]] = a[1][i] + return d +def answer(d,a): + word = random.choice(list(d.values())) + for key in d: + if d[key] == word: + print('твоя подсказка:',key) + while True: + ans = input('введи слово') + if ans == word: + return random.choice(a[2]) + else: + print(random.choice(a[3])) +print('мы загадали слово для тебя') +print(answer(dictionary(open_file()),open_file())) + +import re +def open_text(): + words = [] + with open('text.txt', 'r', encoding ='utf-8') as f: + text = f.read().lower() + text = text.split() + for item in text: + item = item.strip('.,?!-') + if item not in words: + words.append(item) + return words +def answer(words): + for item in words: + m = re.match( r'\bси(д(и(шь|те?|м)?|е(л(о|а|и)?|в(ш(и(й|ми?|е|х)?|е(го|му?|е|й|ю)|ая|ую))?|ть)|я(т|щ(и(й|ми?|е|х)|е(го|му?|е|й|ю)|ая|ую))?)|жу)\b', item) + if m != None: + print(item) +sit = answer(open_text()) + +quantity = 0 +percent = 0 +f = open('newy.txt','r',encoding ='utf-8') +for line in f: + quantity += 1 + a = line.split() + if len(a) > 5: + percent += 1 + else: + continue + a = [] +f.close() +if percent == 0 or quantity == 0: + print(' no lines like this') +else: + print ('the number of lines:', percent / quantity * 100) +import os +def walking(): + d = {root : len(files) for root, dirs, files in os.walk('.')} + maxi = max(d.values()) + for key in d: + if d[key] == maxi: + print ('there are',maxi,'files in',key) +walking() +import re +def open(): + with open('ptitsi.html','r', encoding = 'utf-8') as f: + content = f.read() + return content +def substitute(content): + content = re.sub('<.*?>','', content, flags = re.DOTALL) + content = re.sub(r'(\n| ){2,}','' ,content, flags = re.DOTALL) + content = re.sub('птиц(а(ми?|х)|ы|е(й|ю)?|у)?','рыб\\1', content) + content = re.sub('Птиц(а(ми?|х)|ы|е(й|ю)?|у)?','Рыб\\1', content) + return content +def write(content): + with open('text.txt','w', encoding = 'utf-8') as f: + f.write(content) +print(write(substitute(open())) + + + +import random +def imperative(): + with open('imperatives.txt', 'r',encoding = 'utf-8') as f: + imperatives =[] + for line in f: + newword = line.strip() + imperatives.append(newword) + return random.choice(imperatives) +def noun_acc(): + with open('nouns_Acc_Sg&Pl.txt', 'r',encoding = 'utf-8') as f: + noun_accs =[] + for line in f: + newword = line.strip() + noun_accs.append(newword) + return random.choice(noun_accs) +def ins_phrase(): + with open('clitics_Ins.txt', 'r',encoding = 'utf-8') as f: + clitics = [] + for line in f: + newword = line.strip() + clitics.append(newword) + with open('nouns_Ins.txt', 'r',encoding = 'utf-8') as g: + noun_inss = [] + for line in g: + newword = line.strip() + noun_inss.append(newword) + return random.choice(clitics) + ' ' + random.choice(noun_inss) +def noun_pl(): + with open('nouns_ Nom=Acc_Pl.txt', 'r',encoding = 'utf-8') as f: + noun_pls = [] + for line in f: + newword = line.strip() + noun_pls.append(newword) + return random.choice(noun_pls) +def noun_sg(): + with open('nouns_Nom=Acc_Sg.txt', 'r',encoding = 'utf-8') as f: + noun_sgs = [] + for line in f: + newword = line.strip() + noun_sgs.append(newword) + return random.choice(noun_sgs) +def verb(): + with open('verbs_Pl.txt', 'r',encoding = 'utf-8') as f: + verbs = [] + for line in f: + newword = line.strip() + verbs.append(newword) + return random.choice(verbs) +def adverb(): + with open('adverbs.txt', 'r',encoding = 'utf-8') as f: + adverbs = [] + for line in f: + newword = line.strip() + adverbs.append(newword) + return random.choice(adverbs) +def punctuation(): + marks = ['.', '!', '...'] + return random.choice(marks) +def type1(): + return imperative() + ' ' + noun_acc() + punctuation() +def type2(): + return noun_pl() + ' ' + verb() + punctuation() +def type3(): + return imperative() + ' ' + ins_phrase() + punctuation() +def type4(): + return noun_pl() + ' ' + verb() + ' ' + noun_pl() + punctuation() +def type5(): + return noun_pl() + ' ' + verb() + ' ' + noun_sg() + punctuation() +def type6(): + return ins_phrase() + ' ' + imperative() + ' ' + noun_sg() + punctuation() +def type7(): + return imperative() + ' ' + noun_acc() + ' ' + adverb() + punctuation() +def tanka(i): + line ='' + if (i == 1) or (i == 3): + line = random.choice([1,2,3]) + if line == 1: + line = type1() + if line == 2: + line = type2() + if line == 3: + line = type3() + else: + line = random.choice([4,5,6,7]) + if line == 4: + line = type4() + if line == 5: + line = type5() + if line == 6: + line = type6() + if line == 7: + line = type7() + return line +def printing(): + for i in range(1,6): + print(tanka(i)) +a = printing() + +def open_text(text): + with open(text, 'r', encoding ='utf-8') as f: + text = f.read().lower() + words = text.split() + return words +def percent(words, number): + i,j = 0,0 + for item in words: + if item[0:2] =='un': + i+=1 + if len(item) > number: + j +=1 + if i != 0: + print('the number of words:', i) + return str(round(j / i * 100)) + '%' + else: + return 'no matching words were found' +def questions(): + text = input(' Please, enter the name of the text') + number = int(input(' Please, enter the lenght')) + words = open_text(text) + answer = percent(words, number) + return answer +print('your result is', questions()) +n = int(input( )) +w = 0 +i = 0 +while w <= n: + w = 2**i + i += 1 + if w % 2 == 0 and w <= n: + print (w) +import re +def open_text(): + with open('archi.html','r', encoding = 'utf-8') as f: + text = f.read() + return text +def search(text): + m = re.search(r'title="Коды языков".*?title="ISO (\d\d\d)"',text, flags = re.DOTALL) + return m.group(1) +def write(z): + with open('archi.txt','w', encoding = 'utf-8') as f: + f.write(z) +archi = write(search(open_text())) + +import os +import re +def search(): + count = 0 + a =[] + for f in os.listdir(): + if os.path.isdir(f) and f not in a: + + + lat = re.search('.*[a-zA-z].*', str(f)) + rus = re.search('.*[а-яА-ЯЁё].*', str(f)) + if lat != None and rus != None: + count+=1 + a.append(f) + if count == 1: + print('1 dir was found', end = '') + else: + print (count, 'dirs were found ', end ='') + if a != [] : + print( ':'+', '.join(a)) +search() + +count = 0 +arr = ['','','',''] +while count < 4: + s = input('vvedi slovo') + arr [ int(count)] += s + s = '' + count += 0.5 +for i in range (0,4): + print(arr[i]) +a = int(input('введи а')) +b = int(input('введи b')) +c = int(input('введи с')) +if a / b == c: + print('а разделить на b равно с') +else: + print('а разделить на b не равно с') +if a ** b == c: + print(' а в степени b равно c') +else: + print(' а в степени b не равно с') + +with open ('hw5.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines () + list_1 = [] + for line in lines: + line = line.split() + n = len (line) + list_1.append (n) + sum_list = 0 + sum_line = 0 + for elem in list_1: + if elem > 5: + sum_list += 1 + sum_line += 1 + else: + sum_list += 1 + percent = (sum_line / sum_list) * 100 + print (percent, '% строк содержит больше 5 слов') + + + +import re +def opentext(text): + with open(text, 'r', encoding = 'utf-8') as f: + sentences = f.read() + text = re.sub('\.(\.\.)?|\?', '!', sentences) + list_ = text.split('!') + return list_ +def text_format(text): + text = opentext(text) + text1 = [re.sub('( - )|( — )|( ‒ )', ' ', i) for i in text] + sents = [sent.split() for sent in text1] + sents2 = [[i.strip('.,?!":; + sents3 = [[i.lower() for i in sent] for sent in sents2] + return sents3 +def search(text): + sentences = text_format(text) + repeated = [[w for w in sent if sent.count(w) > 1] for sent in sentences] + return repeated +def count(text): + a = search(text) + b = opentext(text) + for i in range(len(a)): + if a[i]: + print (str(b[i]) + '\n') + c = {w : a[i].count(w) for w in a[i]} + keys = c.keys() + for key in keys: + print ('{:^10}'.format(key) + '{:^10}'.format(c[key])) +text = input('Введите название файла: ') +count(text) + + + + + + + +import re +def opentext(text): + with open(text, 'r', encoding = 'utf-8') as f: + text = f.readlines() + list_ = [] + for line in text: + line = line.split() + list_.extend(line) + words = [] + for i in range(len(list_)): + a = list_[i] + a = a.strip('.,?!"":;*()%$ + words.append(a) + return words +def find_form(): + form = 'си((жу)|д((и((шь)|м|(те?))?)|(е((ть)|(л(а|и|о)?)|(в(ш((и(й|е|х|(ми?))?)|(е((го)|(му?)|й|е)?)|(ая)|(ую))))))|(я(щ((и(й|(ми?)|х|е))|(е((го)|(му?)|й|е))|(ая)|(ую)))?)))' + form2 = 'буд((ут?)|(е(м|(шь)|(те?))))' + words = opentext(text) + forms = [] + for i in range(len(words)): + m = re.search(form, words[i]) + if m != None: + if words[i] == 'сидеть' and re.search(form2, words[i-1]) != None: + form_fut = words[i-1] + ' ' + words[i] + if form_fut not in forms: + forms.append(form_fut) + else: + continue + else: + if words[i] not in forms: + forms.append(words[i]) + else: + continue + else: + continue + return forms +text = input('Введите название файла: ') +m = find_form() +print ('Формы глагола "сидеть", встретившиеся в тексте:') +for i in range(len(m)): + print (m[i], end = '\n') +l = [] +for i in range(8): + l.append (input()) +print (l[0]+l[1]) +print (l[2]+l[3]) +print (l[4]+l[5]) +print (l[6]+l[7]) + +import os +import re +def list_files(path): + files_list = [] + for d, dirs, files in os.walk(path): + for f in files: + path_f = os.path.join(d, f) + files_list.append(path_f) + return files_list +def open_file(f): + with open(f, 'r', encoding = 'utf-8') as k: + text = k.readlines() + return text +def count_sent(path): + files = list_files(path) + list_sent = {} + for f in files: + b = re.search('(_.*?.xhtml)', f) + f_name = b.group(1) + sent = 0 + file_text = open_file(f) + for line in file_text: + if re.search('', line) != None: + sent = sent + 1 + list_sent[f_name] = sent + return list_sent +def file_format_sent(path): + sent = count_sent(path) + with open('task1.txt', 'w', encoding = 'utf-8')as k: + for key in sent.keys(): + k.write(key + '\t' + str(sent[key]) + '\n') + +def inf(f): + text = open_file(f) + inf = {} + for line in text: + author = re.search('content="(.*?)" name="author"', line) + if author != None: + author1 = author.group(1) + for line in text: + topic = re.search('content="(.*?)" name="topic"', line) + if topic != None: + topic1 = topic.group(1) + inf[author1] = topic1 + return inf +def create_csv(path): + files = list_files(path) + with open('task2.csv', 'w', encoding = 'utf-8') as k: + for f in files: + infa = inf(f) + f_name = re.search('(_.*?.xhtml)', f).group(1) + for key in infa.keys(): + k.write(str(f_name) + '\t' + str(key) + '\t' + str(infa[key]) + '\n') + +def pr_loc(f): + text = open_file(f) + bigrams = [] + for i in range(len(text)): + pr = re.search('gr="PR"', text[i]) + if pr != None: + prep = re.search('(.*?)', text[i]).group(1) + loc = re.search('"S.*?loc', text[i+1]) + if loc != None: + S_loc = re.search('(.*?)', text[i+1]).group(1) + bigrams.append(prep + ' ' + S_loc) + return bigrams +def text_without_tegs(f): + text = open_file(f) + text_w_t = '' + for line in text: + if re.search('', line) != None: + word = re.search('(.*?)', line).group(1) + prep = re.search('(.)()?', line) + if prep != None: + if prep.group(1) == '.' or prep.group(1) == '!' or prep.group(1) == '?': + text_w_t = text_w_t + ' ' + word + prep.group(1)+'\n' + else: + text_w_t = text_w_t + ' ' + word + prep.group(1) + else: + text_w_t = text_w_t + ' ' + word + return text_w_t + + + + + + + + + + + +def bigr(path): + files = list_files(path) + with open('task3.txt', 'w', encoding = 'utf-8') as k: + for f in files: + for b in pr_loc(f): + k.write(b + '\n') +path = 'C:\\Users\\1\\Documents\\ниу вшэ\\КИЛИ и программирование\\python\\экзамен\\news' +file_format_sent(path) +create_csv(path) +bigr(path) + + + + + + +import random +def adjective_Abl_m(): + with open('adjective_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def adjective_Abl_f(): + with open('adjective_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def noun_Abl_m(): + with open('noun_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def noun_Abl_f(): + with open('noun_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def noun_phrase(): + with open('prepositions.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + prep = random.choice(line) + while prep != 'в' and prep != 'к' and prep != 'с': + prep = random.choice(line) + if prep == 'в' or prep == 'к': + with open('noun_verse1_prep1.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + else: + with open('noun_verse1_prep2.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + return prep.title() + ' ' + noun1 +def noun_Gen(): + with open('noun_Gen_verse1.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def verse11(): + return adjective_Abl_m().title() + ' ' + noun_Abl_m() +def verse12(): + return adjective_Abl_f().title() + ' ' + noun_Abl_f() +def verse13(): + return noun_phrase() + ' ' + noun_Gen() +def participle_adj(): + with open('participle_adjective_verse2.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def subject(): + with open('subject_verse2.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def place(): + with open('places_verse2.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split(', ') + return random.choice(line) +def obj_f(): + with open('adjective_obj_verse2_f.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + adj = random.choice(line) + with open('object_verse2_f.txt', 'r', encoding = 'utf-8') as k: + objects = k.readlines() + for obj in objects: + obj = obj.split() + obj = random.choice(obj) + return adj + ' ' + obj +def obj_m(): + with open('object_verse2_m.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + obj1 = random.choice(line) + with open('object_Gen_verse2_m.txt', 'r', encoding = 'utf-8') as k: + objects = k.readlines() + for obj in objects: + obj = obj.split() + obj2 = random.choice(obj) + with open('adjective_obj_verse2_m.txt', 'r', encoding = 'utf-8') as l: + adjectives = l.readlines() + for adjective in adjectives: + adjective = adjective.split() + adj = random.choice(adjective) + return adj + ' ' + obj2 + ' ' + obj1 +def verse21(): + return participle_adj().title() + ' ' + subject() + ' ' + place() + '.' +def verse22(): + with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + verb = random.choice(line) + return verb.title() + ' ' + obj_f() +def verse23(): + with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + verb = random.choice(line) + return verb.title() + ' ' + obj_m() +def verb_feel(): + with open('verb_feelings.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def verse31(): + with open('prepositions.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + prep = random.choice(line) + with open('base_noun_verse3.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + base_noun = random.choice(noun) + if prep == 'под' or prep == 'над': + if base_noun == 'мор' or base_noun == 'солнц': + noun = base_noun + 'ем' + else: + noun = base_noun + 'ом' + elif prep == 'у' or prep == 'от' or prep == 'из': + if base_noun == 'мор': + noun = base_noun + 'я' + else: + noun = base_noun + 'а' + elif prep == 'при' or prep == 'на': + noun = base_noun + 'е' + elif prep == 'с': + if base_noun == 'мор' or base_noun == 'солнц': + noun = base_noun + 'ем' + else: + noun = base_noun + 'ом' + prep = 'как с' + elif prep == 'в': + noun = base_noun + 'е' + prep = 'как в' + elif prep == 'к': + if base_noun == 'мор': + noun = base_noun + 'ю' + else: + noun = base_noun + 'у' + prep = 'как к' + else: + if base_noun == 'мор': + noun = base_noun + 'ю' + else: + noun = base_noun + 'у' + return verb_feel().title() + ',' + ' ' + prep + ' ' + noun +def verse32(): + with open('participle_verse3.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + participle = random.choice(line) + with open('subject_verse3.txt', 'r', encoding = 'utf-8') as k: + subjects = k.readlines() + for sub in subjects: + sub = sub.split() + subject = random.choice(sub) + return participle.title() + ' ' + subject + '.' +def verse41(): + with open('noun_verse41_1.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + noun1 = random.choice(line) + with open('prepositions.txt', 'r', encoding = 'utf-8') as k: + preps = k.readlines() + for preposition in preps: + preposition = preposition.split() + prep = random.choice(preposition) + while prep == 'в' or prep == 'к' or prep == 'с': + prep = random.choice(preposition) + if prep == 'под' or prep == 'над': + with open('noun_verse41_2.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'у' or prep == 'от' or prep == 'из': + with open('noun_verse41_3.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'при': + with open('noun_verse41_4.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'на': + with open('noun_verse41_5.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + else: + with open('noun_verse41_6.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + if noun1 == 'дрожь' or noun1 == 'ночь' or noun1 == 'сталь' or noun1 == 'тень' or noun1 == 'кровь' or noun1 == 'плеть': + with open('verb_verse41_1.txt', 'r', encoding = 'utf-8') as l: + verbs = l.readlines() + for verb in verbs: + verb = verb.split() + verb1 = random.choice(verb) + else: + with open('verb_verse41_2.txt', 'r', encoding = 'utf-8') as l: + verbs = l.readlines() + for verb in verbs: + verb = verb.split() + verb1 = random.choice(verb) + return noun1.title() + ' ' + prep + ' ' + noun2 + ' ' + verb1 + '.' +def noun42(): + with open('object_verse42.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + return random.choice(line) +def the_end_of_the_line(): + with open('prepositions.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + line.append('во' and 'со' and 'ко') + line.remove('под') + line.remove('у') + line.remove('от') + line.remove('по') + line.remove('из') + prep = random.choice(line) + if prep == 'во': + with open('noun_verse42_1.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'со': + noun = 'мной' + elif prep == 'ко': + with open('noun_verse42_2.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'при' or prep == 'на': + if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): + with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + while noun2 == 'ней': + noun2 = random.choice(noun) + else: + with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + while noun2 == 'нем': + noun2 = random.choice(noun) + elif prep == 'в': + with open('noun_verse42_4.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'с': + with open('noun_verse42_5.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + elif prep == 'к': + with open('noun_verse42_6.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + else: + if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): + with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + while noun2 == 'ней': + noun2 = random.choice(noun) + else: + with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun2 = random.choice(noun) + while noun2 == 'нем': + noun2 = random.choice(noun) + return prep.title() + ' ' + noun2 +def verse42(): + with open('pronoun_verse4.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + pronoun = random.choice(line) + return verb_feel().title() + ' ' + noun42() + ' ' + pronoun + '... ' + the_end_of_the_line() +def verse51(): + with open('pronoun_verse5.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + pronoun = random.choice(line) + if pronoun == 'вся' or pronoun == 'та': + with open('adjective_verse5_f_4.txt', 'r', encoding = 'utf-8') as k: + adjectives = k.readlines() + for adjective in adjectives: + adjective = adjective.split() + adj = random.choice(adjective) + with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + elif pronoun == 'весь' or pronoun == 'тот': + with open('adjective_verse5_m_3.txt', 'r', encoding = 'utf-8') as k: + adjectives = k.readlines() + for adjective in adjectives: + adjective = adjective.split() + adj = random.choice(adjective) + with open('noun_verse5_m.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + else: + with open('adjective_verse5_f_3.txt', 'r', encoding = 'utf-8') as k: + adjectives = k.readlines() + for adjective in adjectives: + adjective = adjective.split() + adj = random.choice(adjective) + with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: + nouns = l.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + return pronoun.title() + ' ' + adj + ' ' + noun1 + '.' +def verse52(): + with open('parenthesis_verse5.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.split() + parenthesis = random.choice(line) + with open('noun_verse52.txt', 'r', encoding = 'utf-8') as k: + nouns = k.readlines() + for noun in nouns: + noun = noun.split() + noun1 = random.choice(noun) + if noun1 == 'звезда' or noun1 == 'вуаль' or noun1 == 'туман': + with open('verb_verse52_sg.txt', 'r', encoding = 'utf-8') as l: + verbs = l.readlines() + for verb in verbs: + verb = verb.split() + verb1 = random.choice(verb) + else: + with open('verb_verse52_pl.txt', 'r', encoding = 'utf-8') as l: + verbs = l.readlines() + for verb in verbs: + verb = verb.split() + verb1 = random.choice(verb) + return parenthesis.title() + ' ' + noun1 + ' ' + verb1 + '?!' +def poem(): + variant = random.choice([1, 2, 3, 4, 5, 6]) + if variant == 1: + var = random.choice([1, 2]) + if var == 1: + return verse11() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() + else: + return verse12() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() + elif variant == 2: + var = random.choice([1, 2]) + if var == 1: + return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() + else: + return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() + elif variant == 3: + var = random.choice([1, 2, 3, 4]) + if var == 1: + return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + elif var == 2: + return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + elif var == 3: + return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + else: + return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + elif variant ==4: + return verse13() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() + elif variant == 5: + var = random.choice([1, 2]) + if var == 1: + return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + else: + return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() + else: + var = random.choice([1, 2, 3, 4]) + if var == 1: + return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() + elif var == 2: + return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() + elif var == 3: + return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() + else: + return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() +print (poem()) + + + +import re +def open_file(): + with open('Птицы.html', 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def sub_word(): + word1 = '\\bпти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' + word2 = '\\bПти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' + s = re.sub(word1, 'рыб\\1', open_file()) + m = re.sub(word2, 'Рыб\\1', s) + return m +def add_file(): + with open('Замена.html', 'w', encoding = 'utf-8') as k: + k.write(sub_word()) + return k +add_file() + +def data (year, month, day): + if month > 12: + return False + else: + if day >= 31: + return False + else: + if day == 31 and (month == 2 or month == 4 or month == 9 or month == 11 or month == 6): + return False + else: + if day == 30 and month == 2: + return False + else: + if day == 29 and month == 2 and (year % 4 != 0 or (year % 100 == 0 and year % 1000 != 0)): + return False + elif day == 16 and month == 12 and year == 1998: + print ("Вы угадали день рождения разработчика! Не забудьте его поздравить :)") + else: + return True +year = input ("Введите год (натуральное число): ") +month = input ("Введите месяц (натуральное число до 12 включительно): ") +day = input ("Введите день (натуральное число до 31 включительно): ") +while year and month and day: + if data (int(year), int(month), int(day)) == True: + print ("Такая дата есть в календаре:)") + elif data (int(year), int(month), int(day)) == False: + print ("Простите, но такой даты нету:(") + else: + print (data (int(year), int(month), int(day))) + print ("Попробуем снова:)") + year = input ("Введите год (натуральное число): ") + month = input ("Введите месяц (натуральное число до 12 включительно): ") + day = input ("Введите день (натуральное число до 31 включительно): ") +print ("Все!:)") +a = int (input ()) +b = int (input ()) +c = int (input ()) +s = (a + 1) // 2 + (b + 1) // 2 + (c + 1) // 2 +print (s) + + +print (os.path.abspath('.')) +print (os.getcwd()) +os.path.join('texts', '1.txt') + +os.path.exists('texts') +print (os.listdir('.')) + +s = 'hello' +i = 1 +texts = [f for f in os.listdir('.') if f.endswith('.txt')] +print (texts) +for f in os.listdir('.'): + if f.endswith('.txt'): + with open(f, 'a', encoding = 'utf-8') as w: + w.write (s*i) + i += 1 +os.mkdir('corpus1') +os.makedirs('a\\b\\long\\long') +os.rename('texts\\1.txt', 'texts\\2.txt') +os.path.isfile(r'texts\corpus1.txt') +os.path.isdir(r'texts') +shutil.copy(r'texts\2.txt', r'new_corpus\2.txt') +shutil.move('откуда', 'куда') +shutil.copytree('папка', 'папка2') +os.remove(r'new_corpus\2.txt') +shutil.rmtree('corpus') + + + +def align_right(arr): + for i in arr: + print ('{:>40}'.format(i)) +arr = ['abba', 'assa', 'adda', 'affa'] +align_right(arr) + + + + +def tokenize(text): + tokens = text.split() + tokens1 = [t.strip('.,?!":;*()-— ') for t in tokens] + tokens2 = [t.lower() for t in tokens1] + return tokens2 +text = 'Инициатива публикации лучших дисциплин исходила в том числе от Студсовета. Чуть ранее представители Студенческого совета получили возможность использовать результаты СОП при обсуждении возникающих проблем и спорных моментов. Теперь все студенты смогут использовать опубликованную информацию — агрегированное мнение своих предшественников — при формировании собственной индивидуальной образовательной траектории.' +print(tokenize(text)) + + + +def tabulate(a): + for i in range(0, len(a)): + print('{:<10}'.format(a[i][0]) + '{:^10}'.format(a[i][1]) + '{:>10}'.format(a[i][2])) +a = [('кошки','собаки','коровы'), ('мяу','гав','му'), (3,3,2)] +tabulate(a) +x = int (input ()) +if x > 0: + sign = 1 +elif x < 0: + sign = -1 +else: + sign = 0 +print (sign) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +a = int (input ()) +b = int (input ()) +if a < b: + print (a) +else: + print (b) +x = int (input ('введите целое число x = ')) +print ('вы ввели число', x) +res = x*55/100+33 +print ('результат вычислений x * 55 / 100 + 33 =', res) +a = int (input ('введите длину первого катета a = ')) +b = int (input ('введите длину второго катета b = ')) +S = a * b / 2 +print (S) + + +import re +def func1(regw, word1): + word = input('Введите слово: ') + m = re.search(regw, word) + if m != None: + return 'Данное слово является формой слова ' + word1 + else: + return 'Данное слово не является формой слова ' + word1 +word1 = 'свобода' +regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' + + +def if_any(s, regw): + m = re.search(regw, s) + s = s.split() + p = [] + for i in range(len(s)): + m = re.search(regw, s[i]) + if m != None: + p = p.append(s[i]) + else: + continue + return 'Слово встречается в тексте ' + len(p) + ' раз' +s = 'Свободу попугаям!' +print(if_any(s, regw)) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re + + + + + + + + +import os +import shutil +import re + +def make_folders_sent(s): + sent = s.split() + b = '\\'.join(sent) + os.makedirs(b) +s = input('Пожалуйста, введите предложение (без знаков препинания!) \n') +make_folders_sent(s) + +def make_folders_num(n): + for i in range(1,n+1): + os.mkdir(str(i)) + for a in range(i): + name = str(i) + '\\' + str(a+1) + '.txt' + file = open(name, 'w', encoding = 'utf-8') + file.write('Hello!') +n = int(input('Пожалуйста, введите натуральное число \n')) +make_folders_num(n) + +def count(): + filelist = [f for f in os.listdir('.') if os.path.isfile(f)] + exts = [] + for f in filelist: + ext = f.split('.')[-1] + exts.append(ext) + c = {e : exts.count(e) for e in exts} + keys = c.keys() + for key in keys: + print('{:^10}'.format(key) + '{:^10}'.format(c[key])) +count() +name = input ('Введите ваше имя: ') +age = input ('Сколько вам лет? ') +colour = input ('Какой ваш любимый цвет? ') +music = input ('Кто ваш любимый музыкальный исполнитель? ') +dream = input ('Какова ваша заветная мечта? ') +with open ('information.txt', 'w', encoding = 'utf-8') as f: + f.write ('Информация о соседе\n') + f.write (name + '\n' + age + '\n' + colour + '\n' + music + '\n' + dream) +with open('Austen_Jane_Pride_and_Prejudice.txt', 'r', encoding = 'utf-8') as f: + text = f.readlines() + list_ = [] + for line in text: + line = line.split() + list_.extend(line) +print (list_) + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re + + + + + + + + + +with open ('freq.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines () + for line in lines: + if 'союз' in line: + print (line) + + +with open ('freq.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines () + a = [] + for line in lines: + line = line.split () + if 'жен' in line and 'ед' in line: + print (line[0], end = ', ') + a.append (line[-1]) + ipm_sum = 0 + for elem in a: + elem = float (elem) + ipm_sum += elem + print (ipm_sum) + + +with open ('freq.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines () + word = input () + while word: + for line in lines: + line = line.split() + if word in line: + print ('Морфологическая информация: ' + ' '.join (line[2:-2])) + print ('IPM = ' + line[-1]) + word = input () +import random + + +with open ('words.txt', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + random.shuffle(lines) + score = 0 + for line in lines: + line = line.strip () + word, hint = line.split(' ', 1) + response = input ('Какое слово я загадала?\n ' + 'Подсказка: ' + hint + ' ') + if response == word: + print ('Правильно, молодец!') + score += 1 + else: + print ('А вот и нет, слово было ', word) +with open ('scores.txt', 'w', encoding = 'utf-8') as n: + percent = score / 5 * 100 + n.write('Вот результат\n') + n.write(str(percent) + '%') + + +import re +def func1(regw, word1): + word = input('Введите слово: ') + m = re.search(regw, word) + if m != None: + return 'Данное слово является формой слова ' + word1 + else: + return 'Данное слово не является формой слова ' + word1 +word1 = 'свобода' +regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' + + +def if_any(s, regw): + m = re.search(regw, s) + s = s.split() + p = [] + for i in range(len(s)): + m = re.search(regw, s[i]) + if m != None: + p = p.append(s[i]) + else: + continue + return 'Слово встречается в тексте ' + len(p) + ' раз' +s = 'Свободу попугаям!' +print(if_any(s, regw)) + + + + + +import re +def opentext(a): + with open(a, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def delete_tags(): + s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) + return s +def delete_odd(): + s = re.sub ('(\\s)+', '\\1', delete_tags()) + return s + + + + +name = input('Введите название файла: ') + +print (delete_odd()) +n = int (input ()) +hour = n // 60 +if hour >= 24: + k = hour // 24 + hour = hour - k * 24 +minute = n % 60 +print (hour, minute) + + + +import re +def opentext(a): + with open (a, 'r', encoding = 'utf-8') as f: + content = f.read() + return content +def find_all_links(): + reg = r'(.*?)' + links = re.findall(reg, opentext(a)) + return links +a = input('Введите название файла: ') + + + + +def pictures(): + reg = r'
(.*?)
' + pictures = re.findall(reg, opentext(a)) + return pictures +pictures = pictures() +print ('Подписи к картинкам: ') +for picture in pictures: + print (picture[2]) + + + + + + + +def opentext(fname): + with open(fname, 'r', encoding = 'utf-8') as f: + text = f.readlines() + for line in text: + line = line.split() + list_ = [] + for i in range (0, len(line)): + a = line[i] + a = a.lower() + a = a.strip('.,?!";:"*()') + list_.append(a) + return list_ + + + + +def first_letter(letter): + fname = input('введите название файла: ') + text = opentext(fname) + words_letter = [] + for i in range(len(text)): + if text[i].startswith(letter) == True: + words_letter.append(text[i]) + else: + continue + return words_letter + + + + +def questions(): + letter = input('введите первую букву: ') + number = int(input('введите число: ')) + words = first_letter(letter) + result = [] + for i in range(len(words)): + if len(words[i]) > number: + result.append(words[i]) + else: + continue + return result +print (questions()) +a = int (input ('введите первое число ')) +b = int (input ('введите второе число ')) +c = int (input ('введите третье число ')) +s = a + b + c +print (s) + + + + + + + + + + + + + + + +import os +def drawtree(): + for root, dirs, files in os.walk('C:\\Users\\1\\Documents\\ниу вшэ'): + num = root.count('\\') + new_root = root.split('\\')[-1] + print('\t'*num+'--'+new_root+'\n') + for f in files: + print((num+1)*'\t'+f) +drawtree() +name = input () +print ('Hello, ' + name + '!') + + + + + + + + + + + + + + + + + + + + +import re +def opentext(a): + with open(a, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def delete_tags(): + s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) + return s +def delete_odd(): + s = re.sub ('(\\s)+', '\\1', delete_tags()) + return s + + + + +name = input('Введите название файла: ') + +print (delete_odd()) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +import re +def opentext(a): + with open (a, 'r', encoding = 'utf-8') as f: + content = f.read() + return content +def find_all_links(): + reg = r'(.*?)' + links = re.findall(reg, opentext(a)) + return links +a = input('Введите название файла: ') + + + + +def pictures(): + reg = r'
(.*?)
' + pictures = re.findall(reg, opentext(a)) + return pictures +pictures = pictures() +print ('Подписи к картинкам: ') +for picture in pictures: + print (picture[2]) +num = int (input ()) +t = 1 +while t * 2 <= num: + t = t * 2 + print (t) + + + +def opentext(fname): + with open(fname, 'r', encoding = 'utf-8') as f: + text = f.readlines() + list_ = [] + for line in text: + line = line.split() + list_.extend(line) + words = [] + for i in range(len(list_)): + a = list_[i] + a = a.lower() + a = a.strip('.,?!";:"*()') + words.append(a) + return words +def un_forms(): + text = opentext(fname) + words_un = [] + for i in range(len(text)): + if text[i].startswith('un') == True: + words_un.append(text[i]) + else: + continue + return words_un +def quantity(): + words = un_forms() + return len(words) +def percentage(number): + words = un_forms() + s = 0 + for i in range(len(words)): + if len(words[i]) > number: + s += 1 + else: + continue + result = s / len(words) * 100 + return result +fname = input('Введите название файла: ') +number = int(input('Введите число: ')) +print ('Количество слов с приставкой un- равно ', quantity()) +print ('Процент слов с приставкой un- длинее ', number, ' равен ', percentage(number)) + + + +import os +import shutil +import re +def all_files(): + ff = os.listdir('.') + file_names = [] + for f in ff: + if os.path.isfile(f): + a = f.split('.') + if a[-1].isdigit() or re.search(r'\s', a[-1]) != None: + a = '.'.join(a) + elif len(a) > 2: + a[0] = '.'.join(a[:-1]) + name = a[0] + file_names.append(name) + return file_names +def all_dirs(): + ff = os.listdir('.') + dir_names = [] + for f in ff: + if os.path.isdir(f): + name = f + dir_names.append(name) + return dir_names +def all_without_rep(): + names_file = all_files() + names_dir = all_dirs() + names = names_file + names_dir + names_1 = [] + for name in names: + if name not in names_1: + names_1.append(name) + return names_1 +def out_nice(): + names = all_without_rep() + print('Список папок и файлов в текущей директории: ') + for name in names: + print (name) +def cyrill_latin_symb_fold(): + names = all_dirs() + lat = '[a-zA-Z]' + cyr = '[а-яА-Я]' + cyr_lat_dirs = [name for name in names if re.search(lat, name) != None and re.search(cyr, name) != None] + return len(cyr_lat_dirs) +out_nice() +print ('Количество папок, содержащих и латинские, и кириллические символы, равно: ', cyrill_latin_symb_fold()) + + + + + + +import random +def file(): + with open('dictionary.csv', 'r', encoding = 'utf-8') as f: + lines = f.readlines() + d = {} + for line in lines: + line = line.split(';') + d[line[0]] = line[1].strip('\n') + return d +def right(): + with open('Верные ответы.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.split('\n') + return random.choice(text) +def wrong(): + with open('Неверные ответы.txt', 'r', encoding = 'utf-8') as f: + text = f.read() + text = text.split('\n') + return random.choice(text) +def zagadka(d): + keys = d.keys() + keys = list(keys) + key = random.choice(keys) + print ('Подсказка: ' + key + '...') + answer = input('Введите ответ: ') + if answer == d[key]: + return(right()) + else: + return(wrong() + ' Верный ответ ' + d[key] + '.') +d = file() +a = input('Хочешь поиграть? Введи "да" или "нет":)\n') +while a == 'да': + print(zagadka(d)) + a = input('Хочешь сыграть еще раз?:) Введи "да" или "нет"\n') +print ('До свидания!') +a = int (input ()) +b = int (input ()) +c = int (input ()) +if a / b == c: + print (a, "разделить на", b, "равно", c) +else: + print (a, "разделить на", b, "не равно", c) +if a ** b == c: + print (a, "в степени", b, "равно", c) +else: + print (a, "в степени", b, "не равно", c) + + +import os +def max_dir(): + a = {} + for root, dirs, files in os.walk(os.path.abspath('.')): + a[root] = len(files) + max_v = max(a.values()) + if max_v == 1: + print('Наибольшее количество файлов (' + str(max_v) +' файл) в директориях: ') + elif max_v == 2 or max_v == 3 or max_v == 4: + print('Наибольшее количество файлов (' + str(max_v) +' файла) в директориях: ') + else: + print('Наибольшее количество файлов (' + str(max_v) +' файлов) в директориях: ') + for key in a.keys(): + if a[key] == max_v: + print(key) +max_dir() + + + + +import re +def open_file(a): + with open(a, 'r', encoding = 'utf-8') as f: + text = f.read() + return text +def find_ISO(): + reg = 'ISO 639-3(\\w{3})' + m = re.search(reg, open_file(a), flags = re.DOTALL) + if m: + ISO = m.group(2) + return ISO + else: + return 'В статье не указано ISO 639-3' +def add_file(): + with open('Result.txt', 'a', encoding = 'utf-8') as k: + k.write('\n') + k.write(a) + k.write(': ') + k.write(find_ISO()) + return k +a = input('Введите название статьи в формате Название.html: ') +add_file() +word = input ('введите слово: ') +for i in range (1,len(word)): + print (word[i:-i]) +with open('dict.csv', 'r', encoding='utf-8') as f: + lines = f.readlines() + a = dict() + for line in lines: + line = line.strip('\n') + key, value = line.split(':', 1) + a[key] = value + for key in a: + b = input('Угадай слово. Вот подсказка: '+a[key]) + if b == key: + print('Правильно!') + else: + t = 0 + while b != key and t <= (len(key)-1): + b = input('Неправильно, попробуй еще раз: ') + t += 1 + else: + print('Правильно!') +import os + + + + +def greatestway(): + depth = [] + for root, dirs, files in os.walk('.', topdown=False): + a = str(root).count('/') + if a not in depth: + depth.append(a) + return max(depth) +print(greatestway()) +import re +import os + + + +def openfile(): + for root, dirs, files in os.walk('.\\news2'): + for f in files: + with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: + file_text = text.read() + file_text = re.sub('<.*?>', '', file_text) + file_text2 = file_text.split('.') + count = len(file_text2) + print(f, ' ', count) + return +def meta(): + for root, dirs, files in os.walk('.\\news2'): + for f in files: + with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: + file_text = text.read() + writer = re.match('', file_text).group(1) + topic = re.match('', file_text).group(1) + with open('.\\table.csv', 'w', encoding='utf-8') as csv_f: + heading_string = 'Файл' + ' ' + 'Автор' + ' ' + 'Тема' + csv_f.write(heading_string) + with open('.\\table.csv', 'a', encoding='utf-8') as csv_writer: + string = f + ' ' + writer + ' ' + topic + csv_writer.write(string) + return +print(openfile()) +print(meta()) +import re +def openf(): + with open('F.xml', 'r', encoding='utf=8') as f: + lines = f.readlines() + return lines +def countli(): + lines = openf() + linecount = 0 + for line in lines: + linecount += 1 + results = 'result.txt' + with open(results, 'w', encoding='utf-8') as n: + n.write(str(linecount)) + return results +def dicfreq(): + lines = openf() + types = [] + for line in lines: + l = str(line) + if 'lemma' in l: + reg = re.search(r'', l) + types.append(reg.group(2)) + freq = {} + for i in range(len(types)): + if types[i] not in freq: + freq[types[i]] = 1 + else: + freq[types[i]] += 1 + with open('keys.txt', 'w', encoding='utf-8') as te: + te.write('\n'.join(freq.keys())) + return freq +print(countli(), dicfreq()) +import re +fname = input('Введите название файла: ') +def openfile(fname): + with open(fname,'r', encoding='utf-8') as f: + text = f.read() + return text +def sentences(): + text = openfile(fname) + text = text.strip() + se = re.split('\\b[.!?\\n]+(?=\\s)', text) + return se +def find8(): + se = sentences() + greater7 = [] + for i in se: + words = i.split(' ') + words = [str(w).strip('?!&(),.:;«»\n”“ ') for w in words] + greater = [] + greater += [w for w in words if len(w) > 7] + template = '{} {:->10}' + for g in greater: + print(template.format(g,len(g))) + return +print(find8()) + +import re +import os +import shutil +filename = [] +unique = [] +name = '' +def numberinf(): + number = 0 + for f in os.listdir('REALEC'): + name = str(f) + b = re.sub(r'\.\D+', '', name) + c = re.search(r'\d', b) + if c != None: + number += 1 + return number +def foldername(): + for f in os.listdir('REALEC'): + name = str(f) + b = re.sub(r'\.\D+', '', name) + filename.append(b) + for n in filename: + if n != '' and n not in unique: + unique.append(n) + return unique +print(numberinf(), foldername()) +import random +with open('words.txt', 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.strip() + def nm1(): + nm1 = list() + nm1 = lines[0] + nm1 = nm1.strip() + nm1 = nm1.split(' ') + nm1.remove(nm1[0]) + return random.choice(nm1) + def nf1(): + nf1 = list() + nf1 = lines[1] + nf1 = nf1.strip() + nf1 = nf1.split(' ') + nf1.remove(nf1[0]) + return random.choice(nf1) + def nm2(): + nm2 = list() + nm2 = lines[2] + nm2 = nm2.strip() + nm2 = nm2.split(' ') + nm2.remove(nm2[0]) + return random.choice(nm2) + def nf2(): + nf2 = list() + nf2 = lines[3] + nf2 = nf2.strip() + nf2 = nf2.split(' ') + nf2.remove(nf2[0]) + return random.choice(nf2) + def nm3(): + nm3 = list() + nm3 = lines[4] + nm3 = nm3.strip() + nm3 = nm3.split(' ') + nm3.remove(nm3[0]) + return random.choice(nm3) + def nf3(): + nf3 = list() + nf3 = lines[5] + nf3 = nf3.strip() + nf3 = nf3.split(' ') + nf3.remove(nf3[0]) + return random.choice(nf3) + def nm4(): + nm4 = list() + nm4 = lines[6] + nm4 = nm4.strip() + nm4 = nm4.split(' ') + nm4.remove(nm4[0]) + return random.choice(nm4) + def nf4(): + nf4 = list() + nf4 = lines[7] + nf4 = nf4.strip() + nf4 = nf4.split(' ') + nf4.remove(nf4[0]) + return random.choice(nf4) + def nm5(): + nm5 = list() + nm5 = lines[8] + nm5 = nm5.strip() + nm5 = nm5.split(' ') + nm5.remove(nm5[0]) + return random.choice(nm5) + def nf5(): + nf5 = list() + nf5 = lines[9] + nf5 = nf5.strip() + nf5 = nf5.split(' ') + nf5.remove(nf5[0]) + return random.choice(nf5) + def nm6(): + nm6 = list() + nm6 = lines[10] + nm6 = nm6.strip() + nm6 = nm6.split(' ') + nm6.remove(nm6[0]) + return random.choice(nm6) + def nf6(): + nf6 = list() + nf6 = lines[11] + nf6 = nf6.strip() + nf6 = nf6.split(' ') + nf6.remove(nf6[0]) + return random.choice(nf6) + def adjm1(): + adjm1 = list() + adjm1 = lines[12] + adjm1 = adjm1.strip() + adjm1 = adjm1.split(' ') + adjm1.remove(adjm1[0]) + return random.choice(adjm1) + def adjm2(): + adjm2 = list() + adjm2 = lines[13] + adjm2 = adjm2.strip() + adjm2 = adjm2.split(' ') + adjm2.remove(adjm2[0]) + return random.choice(adjm2) + def adjf2(): + adjf2 = list() + adjf2 = lines[14] + adjf2 = adjf2.strip() + adjf2 = adjf2.split(' ') + adjf2.remove(adjf2[0]) + return random.choice(adjf2) + def adjm3(): + adjm3 = list() + adjm3 = lines[15] + adjm3 = adjm3.strip() + adjm3 = adjm3.split(' ') + adjm3.remove(adjm3[0]) + return random.choice(adjm3) + def adjf3(): + adjf3 = list() + adjf3 = lines[16] + adjf3 = adjf3.strip() + adjf3 = adjf3.split(' ') + adjf3.remove(adjf3[0]) + return random.choice(adjf3) + def adjm4(): + adjm4 = list() + adjm4 = lines[17] + adjm4 = adjm4.strip() + adjm4 = adjm4.split(' ') + adjm4.remove(adjm4[0]) + return random.choice(adjm4) + def adjf4(): + adjf4 = list() + adjf4 = lines[18] + adjf4 = adjf4.strip() + adjf4 = adjf4.split(' ') + adjf4.remove(adjf4[0]) + return random.choice(adjf4) + def adjm5(): + adjm5 = list() + adjm5 = lines[19] + adjm5 = adjm5.strip() + adjm5 = adjm5.split(' ') + adjm5.remove(adjm5[0]) + return random.choice(adjm5) + def adjf5(): + adjf5 = list() + adjf5 = lines[20] + adjf5 = adjf5.strip() + adjf5 = adjf5.split(' ') + adjf5.remove(adjf5[0]) + return random.choice(adjf5) + def adjm6(): + adjm6 = list() + adjm6 = lines[21] + adjm6 = adjm6.strip() + adjm6 = adjm6.split(' ') + adjm6.remove(adjm6[0]) + return random.choice(adjm6) + def adjf6(): + adjf6 = list() + adjf6 = lines[22] + adjf6 = adjf6.strip() + adjf6 = adjf6.split(' ') + adjf6.remove(adjf6[0]) + return random.choice(adjf6) + def v1(): + v1 = list() + v1 = lines[23] + v1 = v1.strip() + v1 = v1.split(' ') + v1.remove(v1[0]) + return random.choice(v1) + def v2(): + v2 = list() + v2 = lines[24] + v2 = v2.strip() + v2 = v2.split(' ') + v2.remove(v2[0]) + return random.choice(v2) + def v3(): + v3 = list() + v3 = lines[25] + v3 = v3.strip() + v3 = v3.split(' ') + v3.remove(v3[0]) + return random.choice(v3) + def v4(): + v4 = list() + v4 = lines[26] + v4 = v4.strip() + v4 = v4.split(' ') + v4.remove(v4[0]) + return random.choice(v4) + def v5(): + v5 = list() + v5 = lines[27] + v5 = v5.strip() + v5 = v5.split(' ') + v5.remove(v5[0]) + return random.choice(v5) + def v6(): + v6 = list() + v6 = lines[28] + v6 = v6.strip() + v6 = v6.split(' ') + v6.remove(v6[0]) + return random.choice(v6) + def partm3(): + partm3 = list() + partm3 = lines[29] + partm3 = partm3.strip() + partm3 = partm3.split(' ') + partm3.remove(partm3[0]) + return random.choice(partm3) + def partm4(): + partm4 = list() + partm4 = lines[30] + partm4 = partm4.strip() + partm4 = partm4.split(' ') + partm4.remove(partm4[0]) + return random.choice(partm4) + def partf4(): + partf4 = list() + partf4 = lines[31] + partf4 = partf4.strip() + partf4 = partf4.split(' ') + partf4.remove(partf4[0]) + return random.choice(partf4) + def partm5(): + partm5 = list() + partm5 = lines[32] + partm5 = partm5.strip() + partm5 = partm5.split(' ') + partm5.remove(partm5[0]) + return random.choice(partm5) + def partf5(): + partf5 = list() + partf5 = lines[33] + partf5 = partf5.strip() + partf5 = partf5.split(' ') + partf5.remove(partf5[0]) + return random.choice(partf5) + def partm6(): + partm6 = list() + partm6 = lines[34] + partm6 = partm6.strip() + partm6 = partm6.split(' ') + partm6.remove(partm6[0]) + return random.choice(partm6) + def partf6(): + partf6 = list() + partf6 = lines[35] + partf6 = partf6.strip() + partf6 = partf6.split(' ') + partf6.remove(partf6[0]) + return random.choice(partf6) + def conj1(): + conj1 = list() + conj1 = lines[36] + conj1 = conj1.strip() + conj1 = conj1.split(' ') + conj1.remove(conj1[0]) + return random.choice(conj1) + def conj2(): + conj2 = list() + conj2 = lines[37] + conj2 = conj2.strip() + conj2 = conj2.split(' ') + conj2.remove(conj2[0]) + return random.choice(conj2) + def numm2(): + numm2 = list() + numm2 = lines[38] + numm2 = numm2.strip() + numm2 = numm2.split(' ') + numm2.remove(numm2[0]) + return random.choice(numm2) + def numf2(): + numf2 = list() + numf2 = lines[39] + numf2 = numf2.strip() + numf2 = numf2.split(' ') + numf2.remove(numf2[0]) + return random.choice(numf2) + def numm3(): + numm3 = list() + numm3 = lines[40] + numm3 = numm3.strip() + numm3 = numm3.split(' ') + numm3.remove(numm3[0]) + return random.choice(numm3) + def numf3(): + numf3 = list() + numf3 = lines[41] + numf3 = numf3.strip() + numf3 = numf3.split(' ') + numf3.remove(numf3[0]) + return random.choice(numf3) + def numm4(): + numm4 = list() + numm4 = lines[42] + numm4 = numm4.strip() + numm4 = numm4.split(' ') + numm4.remove(numm4[0]) + return random.choice(numm4) + def numf4(): + numf4 = list() + numf4 = lines[43] + numf4 = numf4.strip() + numf4 = numf4.split(' ') + numf4.remove(numf4[0]) + return random.choice(numf4) + def numm5(): + numm5 = list() + numm5 = lines[44] + numm5 = numm5.strip() + numm5 = numm5.split(' ') + numm5.remove(numm5[0]) + return random.choice(numm5) + def numf5(): + numf5 = list() + numf5 = lines[45] + numf5 = numf5.strip() + numf5 = numf5.split(' ') + numf5.remove(numf5[0]) + return random.choice(numf5) + def adv2(): + adv2 = list() + adv2 = lines[46] + adv2 = adv2.strip() + adv2 = adv2.split(' ') + adv2.remove(adv2[0]) + return random.choice(adv2) + def adv3(): + adv3 = list() + adv3 = lines[47] + adv3 = adv3.strip() + adv3 = adv3.split(' ') + adv3.remove(adv3[0]) + return random.choice(adv3) + def adv4(): + adv4 = list() + adv4 = lines[48] + adv4 = adv4.strip() + adv4 = adv4.split(' ') + adv4.remove(adv4[0]) + return random.choice(adv4) + def adv5(): + adv5 = list() + adv5 = lines[49] + adv5 = adv5.strip() + adv5 = adv5.split(' ') + adv5.remove(adv5[0]) + return random.choice(adv5) + def adv6(): + adv6 = list() + adv6 = lines[50] + adv6 = adv6.strip() + adv6 = adv6.split(' ') + adv6.remove(adv6[0]) + return random.choice(adv6) + def random_line_5_1(): + sentence5_1 = [adjm4() + ' ' + nm1(), adjm3() + ' ' + nm2(), adjm2() + ' ' + nm3(), adjm4() + ' ' + nm1(), + adjf4() + ' ' + nf1(), adjf3() + ' ' + nf2(), adjf2() + ' ' + nf2(), adjf4() + ' ' + nf1(), + partm3() + ' ' + nm2(), partm4() + ' ' + nm1(), partf4() + ' ' + nf1(), + nm5(), nf5(), + numm2() + ' ' + adjm1() + ' ' + nm2(), numm2() + ' ' + adjm2() + ' ' + nm1(), + numm3() + ' ' + adjm1() + ' ' + nm1(), numm4() + ' ' + nm1(), + numf2() + ' ' + adjf2() + ' ' + nf1(), numf2() + ' ' + nf3(), numf3() + ' ' + nf2()] + return random.choice(sentence5_1) + def random_line_7_1(): + sentence7_1 = [adjm6() + ' ' + nm1(), adjm5() + ' ' + nm2(), adjm4() + ' ' + nm3(), adjm3() + ' ' + nm4(), + adjm2() + ' ' + nm5(), adjm1() + ' ' + nm6(), + adjf6() + ' ' + nf1(), adjf5() + ' ' + nf2(), adjf4() + ' ' + nf3(), adjf3() + ' ' + nf4(), + adjf2() + ' ' + nf5(), + partm6() + ' ' + nm1(), partm5() + ' ' + nm2(), partm4() + ' ' + nm3(), partm3() + ' ' + nm4(), + partf6() + ' ' + nf1(), partf5() + ' ' + nf2(), partf4() + ' ' + nf3()] + return random.choice(sentence7_1) + def random_line_7_2(): + sentence7_2 = [v6() + ' ' + conj1(), adv2() + ' ' + v4() + ' ' + conj1(), adv3() + ' ' + v3() + ' ' + conj1()] + return random.choice(sentence7_2) + def random_line_5_2(): + sentence5_2 = [v5(), adv2() + ' ' + v3(), adv3() + ' ' + v2(), adv4() + ' ' + v1()] + return random.choice(sentence5_2) + def random_line_7_3(): + sentence7_3 = [adv2() + ' ' + v5(), adv3() + ' ' + v4(), adv4() + ' ' + v3(), adv5() + ' ' + v2(), + adv6() + ' ' + v1()] + return random.choice(sentence7_3) + def poem(): + p = [random_line_5_1() + '.\n' + random_line_7_1() + '.\n' + random_line_5_1(), + random_line_5_1() + '\n' + random_line_7_2() + '\n' + random_line_5_2(), + random_line_5_1() + '\n' + random_line_7_3() + '.\n' + random_line_5_1(), + random_line_5_1() + '\n' + random_line_7_3() + ',\n' + random_line_5_2()] + return random.choice(p) + print(poem()+'.') +fname = input('Введите название файла: ') +def openfile(fname): + with open(fname, 'r', encoding='utf-8') as f: + text = f.read() + text = text.lower() + text = text.strip() + words = [] + words = text.split(' ') + return words +def count_words(fname): + words = openfile(fname) + n = 0 + for word in words: + word = word.strip('?!@ + n += 1 + return n +def dicff(fname): + words = openfile(fname) + words.sort() + fr = dict() + for index in range(len(words)): + if words[index] in fr: + fr[words[index]] += 1 + else: + fr[words[index]] = 1 + return fr + + +print(count_words(fname), dicff(fname)) +import re +fname = input('Введите название файла: ') +def openfile(fname): + with open(fname, 'r', encoding='utf-8') as f: + text = f.read() + text = text.lower() + text = text.strip() + words = [] + words = text.split(' ') + return words +def words(fname): + words = openfile(fname) + a = [] + for word in words: + word = word.strip('?!@ + a.append(word) + return a +regex = r'\bоткр(ыл[аи]?|о(ют?|е(шь|т|м|те))|ыть)\b' +def formsearch(regex): + wordlist = words(fname) + match = [] + for i in wordlist: + i1 = str(i) + m = re.search(regex, i1) + if m != None: + match.append(i) + strmatch = '\n'.join(match) + return strmatch +print(formsearch(regex)) +fname = input('Введите название файла: ') +def openfile(fname): + with open(fname, 'r', encoding='utf-8') as f: + text = f.read() + text = text.lower() + text = text.strip() + words = [] + words = text.split(' ') + return words +def ingform(fname): + words = openfile(fname) + a = [] + for word in words: + word = word.strip('?!@ + if word.endswith('ing'): + a.append(word) + else: + continue + return a +theword = input('Введите слово: ') +def searching(theword): + s = ingform(fname) + b = 0 + for i in s: + if i == theword: + b += 1 + else: + continue + return b +print(ingform(fname)) +print(searching(theword)) +import re +fname = input('Введите название файла: ') +def open_html(fname): + with open(fname, 'r', encoding='utf-8') as f: + text = f.read() + return text +def find_capital(fname): + text = open_html(fname) + card = re.search(r'', text) + if card != None: + capital = re.search(r'data-wikidata-property-id="P36"(.*?)(.*?)', text) + if capital != None: + return capital.group(3) +def find_country(fname): + text = open_html(fname) + card = re.search(r'
', text) + if card != None: + country = re.search(r'>(.*?)', text) + if country != None: + return country.group(1) +print('Страна: ', find_country(fname), 'Столица: ', find_capital(fname)) +import re +fname = input('Введите название файла: ') +def open_html(fname): + with open(fname, 'r', encoding='utf-8') as f: + text = f.read() + te = re.sub(u'<.*?(".*?")?.*?>', u'', text, flags = re.U) + te2 = re.sub(u'', u'', te, flags = re.U) + te3 = re.sub(u'', u'', te2, flags = re.U) + te4 = re.sub(u'.*?', u'', te3, flags = re.U) + return te4 +def changeform(fname): + te = open_html(fname) + change1 = re.sub(u'комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'слон\\1', te, flags = re.U) + change2 = re.sub(u'Комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'Слон\\1', change1, flags = re.U) + with open('results.txt', 'w', encoding='utf-8') as n: + n.write(change2) + return 'Готово! Результаты в файле results.txt .' +print(changeform(fname)) + + +import os +import re +from math import log +punct = '[.,!«»?&@"$\[\]\(\):;% +tabs = '[\t\n]' +def preprocessing(text): + text_wo_punct = re.sub(punct, '', text.lower()) + text_wo_punct = re.sub(tabs, ' ', text_wo_punct) + words = text_wo_punct.strip().split() + return words +def count_tf(word, text): + n = text.count(word) + return n / len(text) +def count_df(word, texts): + + + + + i = [1 for text in texts if word in text] + i = sum(i) + return i +def count_idf(word, texts): + df = count_df(word, texts) + try: + idf = len(texts) / df + except ZeroDivisionError: + return 0 + return idf +def count_tfidf(word, text, texts): + tf = count_tf(word, text) + idf = count_idf(word, texts) + tfidf = log(tf, 10)*log(idf, 10) + return tfidf +def keywords(text, texts): + keywords = {} + dic_tfidf = {} + for word in text: + if word in dic_tfidf: + continue + tfidf = count_tfidf(word, text, texts) + dic_tfidf[word] = tfidf + i = 0 + for el in sorted(dic_tfidf, key= lambda x: dic_tfidf[x]): + if i > 5: + break + i += 1 + keywords[el] = dic_tfidf[el] + return keywords +def main(): + texts = {} + for root, dirs, files in os.walk('wikipedia'): + for f in files: + with open(os.path.join(root,f), 'r', encoding='utf-8') as t: + content = t.read() + text = preprocessing(content) + texts[f] = text + raw_texts = list(texts.values()) + for t in texts: + print('\nИзвлекаем ключевые слова для текста {}'.format(t)) + kwords = keywords(texts[t], raw_texts) + for key in kwords: + print(key, kwords[key]) +if __name__ == '__main__': + main()