'))
+def find_author(text):
+ text_as_string = open_file_as_string(text)
+ author = re.search('')
+ author = auth.lstrip('')
+ return author
+def find_created(text):
+ text_as_string = open_file_as_string(text)
+ created = re.search('')
+ created = auth.lstrip('')
+ return created
+def main():
+ filetree = os.walk('news')
+ task1 = open('task1.txt', 'w', encoding = 'cp1251')
+ for root, dirs, files in filetree:
+ for f in files:
+ task1.write(f + '\t' + count_words(f) + '\n')
+ task1.close()
+ task2 = open('task2.csv', 'w', encoding = 'cp1251')
+ writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+ for root, dirs, files in filetree:
+ for f in files:
+ f.writerow([f] + [find_author(f)] + [find_created(f)])
+if __name__ == '__main__':
+ main()
+import re
+import os
+import csv
+def open_file(xml):
+ with open(xml, 'r', encoding = 'cp1251') as f:
+ text = f.readlines()
+ return text
+def open_file_as_string(xml):
+ with open(xml, 'r', encoding = 'cp1251') as f:
+ text = f.read()
+ return text
+def count_words(text):
+ text_as_string = open_file_as_string(text)
+ return str(text_as_string.count(''))
+def find_author(text):
+ text_as_string = open_file_as_string(text)
+ author = re.search('')
+ author = auth.lstrip('')
+ return author
+def find_created(text):
+ text_as_string = open_file_as_string(text)
+ created = re.search('')
+ created = auth.lstrip('')
+ return created
+def main():
+ filetree = os.walk('news')
+ task1 = open('task1.txt', 'w', encoding = 'cp1251')
+ for root, dirs, files in filetree:
+ for f in files:
+ task1.write(f + '\t' + count_words(f) + '\n')
+ task1.close()
+ task2 = open('task2.csv', 'w', encoding = 'cp1251')
+ writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+ for root, dirs, files in filetree:
+ for f in files:
+ f.writerow([f] + [find_author(f)] + [find_created(f)])
+if __name__ == '__main__':
+ main()
+import os
+filetree = os.walk('news')
+for root, dirs, files in filetree:
+ for f in files:
+ print(f)
+def new_sentence(sentence):
+ sentence1 = ''
+ for word in sentence:
+ sentence1 += word.strip('.,;:?!') + ' '
+ sentence1 += '.'
+ return sentence1
+def text_process(text_name):
+ f = open(text_name, 'r', encoding='utf-8')
+ text = f.read()
+ text = text.replace('!','.')
+ text = text.replace('?', '.')
+ text = text.replace('...','.')
+ l = text.split(.)
+ l1 = [new_sentence(sentence) for sentence in text]
+ f.close()
+ return l1
+def create_dict(text):
+ dictionary = {sentence: {word: len(word) for word in sentence} for sentence in text}
+def main():
+ text = text_process('text.txt')
+ return(create_dict(text))
+main()
+import random
+n = open('nouns.txt', 'r')
+nouns = [line.strip() for line in n]
+v = open('verbs.txt', 'r')
+verbs = [line.strip() for line in v]
+c = open('clitics.txt', 'r')
+clitics = [line.strip() for line in c]
+n2 = open('nouns2.txt', 'r')
+nouns2 = [line.strip() for line in n2]
+p = open('marks.txt', 'r')
+punctuation = [line.strip() for line in p]
+i = open('imperatives.txt', 'r')
+imperative = [line.strip() for line in i]
+def verse1:
+ return (random.choice(nouns)+ ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation))
+def verse2:
+ return(random.choice(imperative) + ' ' + random.choice(nouns) + ' ' + random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(punctiation)) seq))
+def verse3:
+ return (random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation))
+def make_verse:
+ verse = random.choice([1,2,3])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ else:
+ return verse3()
+for n in range(4):
+ print(make_verse)import os
+def symbols(s):
+ ans = True
+ for i in s:
+ if i not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz':
+ ans = False
+ return ans
+def main():
+ n = 0
+ for f in os.listdir('.'):
+ if symbols(f) = True:
+ n += 1
+ print (f)
+ print (n)
+main()
+s = input()
+l = []
+while s != '':
+ if len(s) > 5:
+ l.append(s)
+ s = input()
+for i in(l):
+ print(i)s = input()
+for i in range(0, len(s) + 1):
+ print(s[0:i])def text_process(text_name):
+ f = open(text_name, 'r', encoding='utf-8')
+ text = f.read()
+ l = text.split()
+ l1 = []
+ for word in l:
+ l1.append(word.strip('.,;:?![]{}'))
+ f.close()
+ return l1
+def count_ness(text):
+ list_ness = []
+ for word in text:
+ if word.endswith(ness):
+ list_ness.append(word)
+ return list_ness
+def frequency(word, text):
+ n = 0
+ for i in text:
+ if i == word:
+ n += 1
+ return n
+def main():
+ text = text_process('text.txt')
+ words = {}
+ for word in count_ness(text):
+ words[word] = frequency(word, text)
+ frequencies = word.values()
+ print(len(count_ness(text)))
+ print(max(frequencies))
+main()import re
+def count_line():
+ with open('Test.xml', 'r', encoding = 'utf-8') as f:
+ s = 1
+ for line in f:
+ if line != ' \n':
+ s += 1
+ else:
+ break
+ return s
+def write_in():
+ with open('Test.txt', 'w', encoding = 'utf-8') as f:
+ num = count_line()
+ f.write(str(num))
+ return
+write_in()
+def open_text():
+ with open('Test.xml', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def phrase():
+ text = open_text()
+ d = {}
+ reg = re.findall(r'.*?',text)
+ for i in range(len(reg)):
+ if reg[i] not in d:
+ d[reg[i]] = 1
+ else:
+ d[reg[i]] +=1
+ return d
+def write_phrase():
+ with open('Test1.txt', 'w', encoding = 'utf-8') as f:
+ d = phrase()
+ for key in d:
+ f.write(key + ',' + str(d[key])+ '\n')
+ return
+write_phrase()
+def n():
+ text = open_text()
+ reg = re.findall(r'(.*?)',text)
+ return reg
+print(n())
+word = input('Введите слово')
+for i in range(len(word)):
+ print(word[i::] + word [:i])
+print('Введите число')
+a=float(input())
+print('Введите число')
+b=float (input())
+print('Введите число')
+c=float(input())
+if a%b==c:
+ print ('a даёт остаток c при делении на b')
+else:
+ print('a не даёт остаток c при делении на b')
+if a*c+b==0:
+ print ('c является решением линейного уравнения ax + b = 0')
+else:
+ print('c не является решением линейного уравнения ax + b = 0')
+import os
+def files():
+ dic={}
+ for root, dirs, files in os.walk('.'):
+ for f in files:
+ f = f[f.rfind('.')+1:]
+ if f not in dic:
+ dic[f] = 1
+ else:
+ dic[f]+=1
+ for key in dic:
+ if dic[key] == max(dic.values()):
+ return key
+print(files())
+import random
+def noun_f():
+ file = open ('Существительные_ж.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ noun = line.split()
+ file.close()
+ return random.choice(noun)
+def noun_m():
+ file = open ('Существительные_м.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ nouns = line.split()
+ file.close()
+ return random.choice(nouns)
+def noun_number_of():
+ file = open ('Существительные_множественные.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ nouns = line.split()
+ file.close()
+ return random.choice(nouns)
+def adjective_m(word):
+ file = open ('Прилагательные_м.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ adjectives = line.split()
+ file.close()
+ return random.choice(adjectives) + ' ' + word
+def adverb():
+ file = open ('Наречия.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ adverbs = line.split()
+ file.close()
+ return random.choice(adverbs)
+def verb_f(subj):
+ file = open ('Глаголы_ж.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ verbs = line.split()
+ file.close()
+ return random.choice(verbs) + ' ' + subj
+def verb_m(adv,n):
+ file = open ('Глаголы_м.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ verbs = line.split()
+ file.close()
+ return adv + ' ' + n+ random.choice(verbs)+ ' '
+def verb_inf():
+ file = open ('Глаголы_инф.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ verbs = line.split()
+ file.close()
+ return random.choice(verbs)
+def verb_transitive(obj):
+ file = open ('Глаголы_переход.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ verbs = line.split()
+ file.close()
+ return ', который ' + random.choice(verbs) + ' ' + obj
+def verb_imp():
+ file = open ('Глаголы_пов.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ verbs = line.split()
+ file.close()
+ return random.choice(verbs)
+def time():
+ file = open ('Время.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ time = line.split()
+ file.close()
+ return random.choice(time)
+def pronoun():
+ file = open ('Местоимения.txt' , 'r', encoding = 'utf-8')
+ for line in file:
+ pronouns = line.split()
+ file.close()
+ return random.choice(pronouns)
+def no():
+ no = [ 'не ', '']
+ return random.choice(no)
+def random_sentence1():
+ sentence = 'Иди и ' + verb_imp() + ' мне ' + noun_m()+'а' + '!'
+ return sentence
+def random_sentence2():
+ sentence = adjective_m(noun_m()) + verb_transitive(noun_number_of())+ ',' +\
+ verb_m(adverb(), no()) + verb_inf() + '.'
+ return sentence
+def random_sentence3():
+ sentence = 'Где ' + time() + ' ' + verb_f(noun_f()) + '?'
+ return sentence
+def random_sentence4():
+ sentence = 'Если б ' + pronoun() + ' был ' + noun_m()+ ', то ' +\
+ verb_m(adverb(), no())+ ' бы ' + verb_inf() + '.'
+ return sentence
+def random_text():
+ sentences = [random_sentence1(), random_sentence2(), random_sentence3(), random_sentence4()]
+ return random.choice(sentences)
+print("---- FASCINATING MASTERPIECE STARTS HERE ----")
+num_of_sents = 5
+for i in range(num_of_sents):
+ sentence = random_text()
+ sentence = sentence.capitalize()
+ print(sentence, end=' ')
+print("\n---------AND ENDS HERE ---------")
+print ('Введите слово')
+word = input()
+for letter in word[::-1]:
+ if letter not in 'з,я':
+ print (letter)
+ if letter in 'з,я':
+ continue
+ print (letter)
+import re
+import os
+def text_read():
+ for root, dirs, files in os.walk('.'):
+ for f in files:
+ if f.endswith('.xml'):
+ with open( f, 'r', encoding = 'utf - 8') as text:
+ text = text.read()
+ return text
+def count():
+ text = text_read()
+ reg1 = re.findall(r'.*', text)
+ num = len(reg1)/len(reg2)
+ return num
+print(count())
+def part_of_speech():
+ text = text_read()
+ dic = {}
+ reg = re.findall(r'gr="([A-Z]*)', text)
+ for i in reg:
+ if i not in dic:
+ dic[i] = 1
+ else:
+ dic[i]+=1
+ return dic
+print(part_of_speech())
+def write_in():
+ with open('Test1.txt', 'w', encoding = 'utf-8') as f:
+ d = part_of_speech()
+ template = '{}{:>10}'
+ for key in sorted(d):
+ f.write((template.format(key, d[key]))+ '\n')
+ return
+
+def write():
+ with open('Test1.txt', 'w', encoding = 'utf-8') as f:
+ d = part_of_speech()
+ for key in sorted(d):
+ f.write(key+'\t'+str(d[key])+ '\n')
+ return
+write()
+import re
+def open_text():
+ with open('Programming.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip(',.?!-')
+ return arr
+def prog():
+ arr = open_text()
+ regex = r'\bпрограммир(ова(ть(ся)?|нн(ым|о(е|го|му?))|вш(ая|ую|и(е|й|ми?|х)|е(й|е|му?|го))(ся)?|в|л([иа]?(сь)?)|(ся)?)|у((я(сь)?|ем(о(е|го|й|му?)|ы(е|й|х|ми?)|ая|ую)|ю(щ(ая|ую|и(е|й|х|ми?)|е(го|й|му?))(ся)?))|ют(ся)?|е((шь|т|ем)(ся)?)|ю(сь)?|ете(сь)?))\b'
+ arr1 = []
+ for i in range(len(arr)):
+ m = re.search(regex,arr[i])
+ if m != None:
+ if arr[i] in arr1:
+ pass
+ else:
+ arr1.append(arr[i])
+ return ', '.join(map(str,arr1))
+print(prog())
+import re
+def open_s():
+ with open ('Высшая школа экономики — Википедия.html','r', encoding = 'utf - 8')as f:
+ content = f.read()
+ links = r'Преподаватели | \n\n (.*?)(.*?) '
+ m = re.search(links,content)
+ if m != None:
+ return m.group(3)
+def result():
+ with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
+ result = open_s()
+ return file.write('Преподаватели:'+ result)
+open_s()
+result()
+import re
+def open_text():
+ with open('Leskov.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip(',.”"?!-:;')
+ return arr
+def words():
+ arr = open_text()
+ return len(arr)
+print(words())
+def frequency():
+ arr = open_text()
+ d = {}
+ for i in range(len(arr)):
+ if arr[i] not in d:
+ d[arr[i]] = 1
+ else:
+ d[arr[i]] +=1
+ return d
+def result():
+ with open ('Результат.csv', 'w', encoding = 'utf - 8')as file:
+ d = frequency()
+ for key in sorted(d):
+ file.write(key + ',' + str(d[key])+ '\n')
+ return
+result()
+def phrase():
+ with open('Leskov.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
+ reg = re.findall(r'\b\w*аго \w*(?:и|а|ы)',text)
+ for reg
+ return reg
+phrase()
+def puzzle():
+ phrase = {'незванный':'гость','розовый':'слон','вишнёвый':'сад', 'сиреневый':'туман', 'кленовый':'лист'}
+ for key in phrase:
+ for i in range(len(key)):
+ print(key + '...')
+ w = input('Я загадал слово ')
+ if w == phrase[key]:
+ return print('Ты выиграл')
+ else:
+ print ('Ты проиграл')
+ return
+puzzle()
+def open_text():
+ with open('Книга1.csv', 'r', encoding = 'utf - 8') as f:
+ line= f.readline()
+ arr = line.split(';')
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip('\n')
+ line = f.readline()
+ arr1 = line.split(';')
+ phrase = dict(zip(arr,arr1))
+ return phrase
+def puzzle():
+ phrase = open_text()
+ for key in phrase:
+ for i in range(len(key)):
+ print(key + '...')
+ w = input('Я загадал слово ')
+ if w == phrase[key]:
+ return print('Ты выиграл')
+ else:
+ print ('Ты проиграл')
+ return
+puzzle()
+import re
+def open_s():
+ with open ('Лингвистика — Википедия.html','r', encoding = 'utf - 8')as f:
+ text = f.read()
+ return text
+def lang_meat():
+ m = open_s()
+ step = re.sub(r'\bязык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bшашлык\1',m, flags = re.DOTALL)
+ step2 = re.sub(r'\bЯзык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bШашлык\1', step, flags = re.DOTALL)
+ return step2
+def result():
+ with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
+ result = lang_meat()
+ return file.write(result)
+result()
+first = 0
+second = 0
+f = open( "Капибара.txt", "r", encoding = "utf-8")
+for line in f:
+ arr = line.split()
+ for i in arr:
+ if len(i) == 3 and i[len(i)-1] != ',' and i[len(i)-1] != '.' and i[len(i)-1] != ':' and i[len(i)-1] != ';' and i[len(i)-1] != '!' and i[len(i)-1] != '?':
+ first += 1
+ if len(i) == 4 and i[len(i)-1] == ',' for i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?':
+ first += 1
+ if len(i) == 1 and i != '―':
+ second += 1
+ if len(i) == 2 and i[len(i)-1] == ',' or i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?':
+ second +=1
+if second == 0:
+ print( 'Слов длины один нет')
+else:
+ num = first/second
+print(num)
+f.close()
+import re
+import os
+def folder():
+ arr = [f for f in os.listdir('.')if re.search(r'[а-яёЁА-Я]+',f)if os.path.isdir(f)]
+ print(len(arr))
+ return arr
+folder()
+def print_result():
+ result = []
+ for f in os.listdir('.'):
+ if os.path.isfile(f):
+ f = f[:f.rfind('.')]
+ if f not in result:
+ result.append(f)
+ else:
+ if f not in result:
+ result.append(f)
+ return ' '.join([str(i) for i in result])
+print(print_result())
+import os
+import re
+def text():
+ for root, dirs, files in os.walk('.'):
+ for f in files:
+ if f.endswith('.xhtml'):
+ with open( f, 'r') as text:
+ text = text.read()
+ reg = re.findall(r'', text)
+ with open('Exam.txt', 'w', encoding = 'utf-8') as f2:
+ f2.write(f +'\t'+str(len(reg))+ '\n')
+ return
+text()
+def table():
+ for f in os.listdir('.'):
+ with open( f, 'r') as text:
+ text = text.read()
+ reg1 = re.findall(r' ([А-Яа-яёЁ]*)\.', text)
+ reg2 = re.findall(r'([0-9]*)', text)
+ for i in reg1 and j in reg2:
+ with open ('Результат.csv', 'w', encoding = 'utf - 8')as file:
+ file.write( f + ',' + i + ',' + j+ ','+ '\n')
+ return
+table()
+n = 0
+f = open( "Цитаты.txt", "r", encoding = "utf-8")
+for line in f:
+ arr = line.split ('—')
+ arr2 = arr[0].split()
+ if len(arr2) < 10:
+ print (arr[0])
+arr3 = line.split()
+for i in arr3:
+ if i == 'разум':
+ n += 1
+print (n)
+f.close()
+import re
+def open_s():
+ with open ('Капибара — Википедия.html','r', encoding = 'utf - 8')as f:
+ text = f.read()
+ reg = r'(.*?)'
+ m = re.findall(reg,text)
+ return m
+print (open_s())
+for link in links [:10]:
+ print(link[0]
+for link in links[:10]:
+ print(link[2],'-->', link[1]
+import os
+def delete(dirname):
+ for root, dirs, files in os.walk(dirname):
+ for f in files:
+ os.remove(os.path.join(root, f))
+ for d in dirs:
+ delete(os.path.join(root,d))
+ os.rmdir(root)
+delete('кот')
+def print_tree(dirname, space = 0):
+ for root, dirs, files in os.walk(dirname):
+ print(''*root)
+ for i in files:
+ print(''*space,' **()'.format(i)
+ space += 2
+import os
+def task_0():
+ print(os.listdir('.'))
+task_0()
+def task_1():
+ sent = input('Введите предложение:')
+ arr = sent.split()
+ path = '\\'.join([str(i) for i in arr])
+ os.makedirs(path)
+task_1()
+def task_2():
+ n = int(input())
+ arr = []
+ for i in range(n):
+ arr.append(i)
+ path = '\\'.join([str(i) for i in arr])
+ os.makedirs(path)
+task_2()
+def count_tf(word, text):
+ return text.count(word) / len(text)
+def count_df(word, texts):
+ n = [1 for text in texts if word in text]
+ return sum(n)
+def count_idf(word, texts):
+ n = len(texts) / (1 + count_df(word, texts))
+ return n
+from math import log
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ return log(tf, 10) * log(idf, 10)
+import re
+punct = '[.,!«»?&@"$\[\]\(\):;%
+def preprocessing(text):
+ text_wo_punct = re.sub(punct, '', text.lower())
+ word = text_wo_punct.strip().split()
+ words = [i for i in word if len(i)>4 and re.search(r'[1-9]+', i) is None]
+ return words
+import os
+texts_dic = {}
+for root, dirs, files in os.walk('wikipedia'):
+ for f in files[:50]:
+ with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
+ text = preprocessing(t.read())
+ texts_dic[f.split('.')[0]] = text
+texts = list(texts_dic.values())
+for text in texts_dic:
+ print("Top words in document {}".format(text))
+ scores = {}
+ for word in texts_dic[text]:
+ scores[word] = count_tfidf(word, texts_dic[text], texts)
+ sorted_words = sorted(scores.items(), key=lambda x: x[1])
+ for word, score in sorted_words[:5]:
+ print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
+def open_text():
+ with open('text.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip(',.?!-')
+ return arr
+
+def first_letter(letter):
+ arr = open_text()
+ for i in arr:
+ if letter == i[0]:
+ print (i)
+ else:
+ pass
+ return i
+letter = input('Введите букву')
+
+
+
+def c():
+ cont = input('Введите страну ')
+ d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'}
+ if cont in d:
+ return d[cont]
+ else:
+ return 'NO'
+
+def change():
+ d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'}
+ d1 = {}
+ for key in d:
+ city = d[key]
+ d1[city] = key
+ return d1
+
+def delete_doubles():
+ d = { 'Петя': 12345, 'Пётр': 12345, 'Аня': 54321, 'Анна': 54321, 'Сёма': 13579}
+ arr = []
+ d1 = {}
+ for key in d:
+ if d[key] in arr:
+ pass
+ else:
+ append
+ d1[key] = d[key]
+ return d1
+print(delete_doubles())
+import re
+def open_text():
+ with open('Жирафики.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip(',.?!-')
+ return arr
+def giraf():
+ s = input('Введите что-нибудь ')
+ regex = 'жираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?'
+ m = re.search(regex,s)
+ if m != None:
+ return 'Я нашёл'
+print(giraf())
+def giraf_in_text():
+ arr = open_text()
+ regex = r'\bжираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?\b'
+ m = re.search(regex,i[arr])
+ s = 0
+ for i[arr] in arr:
+ if m != None:
+ s += 1
+ return s
+print(giraf_in_text())
+import re
+def open_s():
+ with open ('Динозавры — Википедия.html','r', encoding = 'utf - 8')as f:
+ text = f.read()
+ return text
+def find_dino():
+ text = open_s()
+ reg = r'\b[Дд]инозавр[а-я]{0,5}'
+ m = re.findall(reg, text)
+ return m
+print (find_dino())
+def no_html():
+ text = open_s()
+ m = re.sub(u'<.*?>', u'', text, flags = re.DOTALL)
+ return m
+print (no_html())
+def cat_dino():
+ m = no_html()
+ n = re.sub(r'\bдинозавр',r'\bкот',m,flags = re.DOTALL)
+ n1 = re.sub(r'\bДинозавр',r'\bКот',n, flags = re.DOTALL)
+ return n1
+print(cat_dino())
+import re
+def open_text():
+ with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = re.sub(r'\n',' ', text)
+ arr = re.split('\.|\?|\! ', text)
+ for i, s in enumerate(arr):
+ arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i])
+ return arr
+def words_5():
+ arr = open_text()
+ for i in range(len(arr)):
+ arr1 = arr[i].split()
+ template = '{}_{}'
+ length =[template.format(arr1[i],len(arr1[i])) for i,w in enumerate(arr1)]
+ for i in range(len(length)):
+ print(length[i])
+ return
+words_5()
+import re
+def open_text():
+ with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = re.sub(r'\n',' ', text)
+ arr = re.split('\.|\?|\! ', text)
+ for i, s in enumerate(arr):
+ arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i])
+ return arr
+def words_5():
+ arr = open_text()
+ for i in range(len(arr)):
+ arr1 = arr[i].split()
+ template = '{}_{}'
+ length ={print(template.format(arr1[i],len(arr1[i]))) for i,w in enumerate(arr1)}
+ return
+words_5()
+def open_text():
+ with open('Austen_Jane.txt', 'r', encoding = 'utf - 8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip(',.”"?!-:;')
+ return arr
+def words():
+ arr = open_text()
+ arr1 = []
+ for i,w in enumerate(arr):
+ if arr[i][-4:] == 'hood':
+ arr1.append(arr[i])
+ return arr1
+def number_of_words():
+ arr1 = words()
+ return len(arr1)
+def the_minimum_frequency():
+ arr = open_text()
+ arr1 = words()
+ y = 1
+ n = 0
+ for i,w in enumerate(arr1):
+ y = min(y, arr1.count(arr1[i]))
+ for i,w in enumerate(arr1):
+ if y == arr1.count(arr1[i]):
+ n = i
+ return arr1[n]
+def base():
+ arr1 = words()
+ arr2 = []
+ for i,w in enumerate(arr1):
+ x = arr1[i].rfind('h')
+ arr2.append(arr1[i][:x])
+ return arr2
+print(number_of_words())
+print(the_minimum_frequency())
+print(' '.join(map(str,(base()))))
+arr = []
+word = input('Введите слово')
+while word:
+ arr.append (word)
+ word = input( 'Введите слово')
+for w in range(len(arr)):
+ print(arr[w][w+1::])
+word=input("Введите слово: ")
+n=[]
+for i in range(len(word)):
+ n=word[len(word)-i:]
+ n+=word[i:]
+ print(n)
+print ("Введите число")
+num=int(input())
+print ("Введите слово")
+w=input()
+while w!= "программирование" and w!= "программирование":
+ for i in range(num):
+ print (w)
+ print ("Введите число")
+ num=int(input())
+ print ("Введите слово")
+ w=input()
+print ("Конец")
+w = []
+while True:
+ word =(input('Введите латинское слово: '))
+ if len (word) ==0: break
+ elif word[-2:]== 're' or word [-2:]=='ri':
+ w.append(word)
+for i in range (len(w)):
+ print (w[i])
+def open_text(name):
+ with open (name+'.txt', 'r', encoding ='utf-8') as f:
+ text=f.read()
+ ntext=text.lower()
+ words=ntext.split(' ')
+ for i,word in enumerate (words):
+ words[i]=word.strip('.,!?-')
+ return words
+def edwords(a):
+ ed=[]
+ edlist=int()
+ for i,word in enumerate (a):
+ if word.endswith('ed'):
+ ed.append(word)
+ edlist+=1
+ print ('Количество форм на -ed равно',str(edlist))
+ return (ed)
+def iedwords(b):
+ iedlist=int()
+ for i,word in enumerate (b):
+ if word.endswith('ied'):
+ iedlist+=1
+ print ('Количество форм, образованных от глаголов на -у или -е равно',str(iedlist))
+ return ()
+def end():
+ name=input('Введите название файла: ')
+ a=open_text(name)
+ b=edwords(a)
+ c=iedwords(b)
+ return (c)
+u=end()
+with open("text.txt", "r", encoding="utf-8") as f:
+ text=f.read()
+ words=text.split(' ')
+ words_num=len(words)
+ letters=list(text)
+ marks_num=int()
+ for i in letters:
+ if i=="." or i==",":
+ marks_num+=1
+ percent=marks_num/words_num*100
+ print('Процент слов, имеющих знак препинания: ', round(percent))
+import random
+def bigram():
+ b={}
+ with open ('text.csv', 'r') as f:
+ lines=f.readlines()
+ for line in lines:
+ line=line.split(',')
+ b[line[0]]=line[1]
+ return(b)
+def dots(w):
+ res=''
+ for i in range(len(w)):
+ res+='. '
+ return res
+def rand(b):
+ k=list(b.keys())
+ return random.choice(k)
+print ('Сейчас мы сыграем в игру "Угадай слово"!')
+big=bigram()
+word=rand(big)
+print ("Подсказка:")
+print (big[word]+' '+ dots(big[word]))
+answer=input('Как вы думаете, что это за слово? ')
+if answer==word:
+ print ("Правильно!")
+else:
+ print ('Увы, неправильно!')
+import os
+import re
+def sents():
+ news = 'news'
+ sent = {}
+ for n in os.listdir(news):
+ with open(os.path.join(news, n), encoding='cp1251') as text:
+ text = text.read()
+ sent[n] = len(re.findall('', text))
+
+ return (sent)
+def new (sent):
+
+ with open('new_file', 'w', encoding = 'utf-8') as new:
+ for s in sent:
+ new.write(s+'\t'+str(sent[s])+'\n')
+new(sents())
+import re
+def oh():
+ lines=int()
+ with open ('text.txt', 'r', encoding='utf-8') as first:
+ old_text=first.readlines()
+ for line in old_text:
+ if '' in line:
+ break
+ else:
+ lines+=1
+ with open ('endtext.txt', 'w', encoding='utf-8') as second:
+ lines1=str(lines)
+ second.write(lines1)
+def oops():
+ slov={}
+ with open ('text.txt', 'r', encoding='utf-8') as first:
+ old_text=first.readlines()
+ for line in old_text:
+ if " 2) and (usl[2] == 'ед') and (usl[3] == 'жен'):
+ mass.append(arr[0])
+ sum += float(arr[2])
+ print(', '.join(mass))
+ print('Сумма ipm = ', sum)
+n=input('Введите любое число. ')
+n=int(n)
+while n != 0 :
+ sl=input('Введите любое слово. ')
+ if sl == 'программирование':
+ break
+ print(sl)
+ n=n-1
+word = input("Введите слово: ")
+for k in range(len(word)):
+ newword = (word[-k: ] + word[ :-k])
+ print(newword)
+import re
+import os
+
+def sent_count():
+ path = './news/'
+ for root, dirs, files in os.walk(path):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding = 'cp1251') as t:
+ text = t.read()
+ mass = []
+ mass = text.split('\n')
+ s_count = 0
+ for i in mass:
+ if re.search('', i):
+ s_count += 1
+ with open('result.txt', 'a', encoding = 'utf-8') as file:
+ file.write(f +'\t' + str(s_count) + '\n')
+
+def write_csv():
+ with open('result.csv', 'w', encoding = 'utf-8') as file:
+ output = csv.writer(file, delimiter = ',')
+ head = ['Название файла', 'Автор', 'Тематика текста']
+ path = './news/'
+ for root, dirs, files in os.walk(path):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding = 'utf-8') as t:
+ text = t.read()
+ if re.search('', text):
+ auth = re.search('', text).group(1)
+def main():
+ sent_count()
+main()
+s = 8
+p = input ("введите число")
+p = int (p)
+while p!=s:
+ if p < s:
+ print ("больше")
+ else:
+ print ("меньше")
+ p = input ("ещё раз")
+ if len (p) == 0:
+ print ("всё")
+ break
+ p = int (p)
+if p==s:
+ print ("вы выиграли")
+print (range (10))
+import os
+direct = 'D:\Downloads\news.zip\news'
+files = os.listdir(direct)
+for file in files:
+ f = open(file, 'r')
+ sent = 0
+ for line in f:
+ if '. ' in line:
+ sent += 1
+ f.close()
+ f = open('sent.txt', 'a')
+ f.write(file, ' ', sent, '\n')
+ f.close()
+A = [0] * 7
+for i in range(7):
+ A[i] = int(input("введите число"))
+for i in range(7):
+ B = ["X"] * A[i]
+ if A[i] < 0:
+ print ("введено отрицательное число")
+ else:
+ print (''.join([str(i) for i in B]))
+s = input ("введите число")
+s = int (s)
+for i in range (10):
+ i += 1
+ p = i*s
+ print (i, "*", s, "=", p)
+words = 0
+cap_words = 0
+for line in open('text.txt','r', encoding='utf-8'):
+ word = line.split(' ')
+ for word in open('text.txt','r', encoding='utf-8'):
+ words +=1
+ if word.istitle():
+ cap_words += 1
+s = (cap_words/words)*100
+print ("слов, начинающихся с заглавной буквы", s, "%")
+a = input ("введите a")
+b = input ("введите b")
+c = input ("введите c")
+a = int (a)
+b = int (b)
+c = int (c)
+if c == a % b:
+ print ("a даёт остаток c при делении на b")
+else:
+ print ("a НЕ даёт остаток c при делении на b")
+if c == a/b:
+ print ("a разделить на b равно c")
+else:
+ print ("a разделить на b НЕ равно c")
+def linecount (text):
+ lines = 0
+ for line in text:
+ lines += 1
+ return lines
+def freq (text):
+ A = dict()
+ for line in text:
+ if " 3):
+ dic[element] += 1
+ elif len(element)> 3:
+ dic[element] = 1
+ return dic
+def delete (dic):
+ dic1=dic.copy()
+ for word in dic1:
+ if dic1[word] == 1:
+ del dic[word]
+ return dic
+corpus_freq = freq_dict(words)
+anek_freq = freq_dict(words_anek)
+izvest_freq = freq_dict(words_izvest)
+teh_freq = freq_dict(words_teh)
+delete (corpus_freq)
+delete(anek_freq)
+delete(izvest_freq)
+delete(teh_freq)
+def pmi_for_cats(x, y):
+ if y == 'anek':
+ dic = anek_freq
+ arr = words_teh + words_izvest
+ num = num_anek
+ elif y == 'teh':
+ dic = teh_freq
+ arr = words_anek + words_izvest
+ num = num_teh
+ elif y == 'izvest':
+ dic = izvest_freq
+ arr = words_teh + words_anek
+ num = num_izvest
+ p_xy = dic[x]/len(arr)
+ p_x, p_y = corpus_freq[x]/len(words), num/(num_izvest + num_teh + num_anek)
+ pmi = log(p_xy/(p_x * p_y))
+ return pmi
+cat_pmi = {}
+i = 0
+for word in corpus_freq:
+ if i > 100:
+ break
+ try:
+ pmi_anek = pmi_for_cats(word, 'anek')
+ except KeyError:
+ pmi_anek = 0
+ try:
+ pmi_teh = pmi_for_cats(word, 'teh')
+ except KeyError:
+ pmi_teh = 0
+ try:
+ pmi_izvest = pmi_for_cats(word, 'izvest')
+ except KeyError:
+ pmi_izvest = 0
+ max_pmi = max(pmi_anek, pmi_teh, pmi_izvest)
+ if max_pmi == 0:
+ continue
+ if max_pmi == pmi_anek:
+ cat = 'anek'
+ elif max_pmi == pmi_teh:
+ cat = 'teh'
+ elif max_pmi == pmi_izvest:
+ cat = 'izvest'
+ print(word, cat)
+ i += 1
+word = input ("Введите слово: ")
+a = 0
+b = len (word)
+while word [a:b] != "":
+ print (word [a:b])
+ a += 1
+ b -=1
+A = []
+i = 0
+print ('Enter 8 words')
+while i < 9:
+ list.append(A, input())
+ i+=1
+i = 1
+while i < 9:
+ print (A[i-1]+A[i])
+ i+=2
+a = int (input ("Введите число a: "))
+b = int (input ("Введите число b: "))
+c = int (input ("Введите число c: "))
+if (a/b == c):
+ print ("a / b = c")
+else: print ("a / b != c")
+if (a ** b == c):
+ print ("a ^ b = c")
+else: print ("a ^ b != c")
+def questions():
+ file = input('Введите название файла на английском: ')
+ leng = int(input('Введите длину слова: '))
+ quant = open_file(file)
+ output = perc(quant, leng)
+ return output
+def open_file(file):
+ f = open(file, 'r')
+ file = f.read()
+ file = file.split()
+ return file
+def perc(quant, leng):
+ i = 0
+ j = 0
+ for item in quant:
+ if item.startswith('un'):
+ i += 1
+ if len(item) > leng:
+ j += 1
+ if i != 0:
+ print ('Количество слов, начинающихся с un-, в тексте: ', i)
+ return round(j / i * 100)
+ else:
+ return 'В тексте нет слов, начинающихся на un-'
+print('Проценты: ', questions())
+import random
+def read ():
+ f = open('text.txt', 'r')
+ l = f.readlines()
+ return l
+def array (numb):
+ a = read()[numb].split()
+ return a
+def noun2 ():
+ return random.choice(array(0))
+def noun3 ():
+ return random.choice(array(1))
+def noun4 ():
+ return random.choice(array(2))
+def imper2 ():
+ return random.choice (array(3))
+def imper3 ():
+ return random.choice(array(4))
+def imper4 ():
+ return random.choice(array(5))
+def verb2 ():
+ return random.choice(array(6))
+def verb3 ():
+ return random.choice(array(7))
+def verb4 ():
+ return random.choice(array(8))
+def adverb1 ():
+ return random.choice (array(9))
+def adverb2 ():
+ return random.choice (array(10))
+def adverb3 ():
+ return random.choice (array(11))
+def adverb4 ():
+ return random.choice (array(12))
+def punct():
+ marks = [".", "?", "!", "..."]
+ return random.choice(marks)
+def verse_5_1 ():
+ return imper3() + ' ' + noun2() + punct()
+def verse_5_2 ():
+ return imper2() + ' ' + noun3() + punct()
+def verse_5_3 ():
+ return verb2() + ' ' + noun3() + punct()
+def verse_5_4 ():
+ return verb3() + ' ' + noun2() + punct()
+def verse_5_5 ():
+ return adverb1() + ' ' + verb2() + ' ' + noun2() + punct()
+def verse_5_6 ():
+ return adverb1() + ' ' + imper4() + punct()
+def verse_5_7 ():
+ return adverb2() + ' ' + imper3() + punct()
+def verse_5_8 ():
+ return adverb3() + ' ' + imper2() + punct()
+def verse_7_1 ():
+ return imper3() + ' ' + noun4() + punct()
+def verse_7_2 ():
+ return imper4() + ' ' + noun3() + punct()
+def verse_7_3 ():
+ return verb3() + ' ' + noun4() + punct()
+def verse_7_4 ():
+ return verb4() + ' ' + noun3() + punct()
+def verse_7_5 ():
+ return adverb1() + ' ' + verb3() + ' ' + noun3() + punct()
+def verse_7_6 ():
+ return adverb1() + ' ' + verb4() + ' ' + noun2() + punct()
+def verse_7_7 ():
+ return adverb1() + ' ' + verb2() + ' ' + noun4() + punct()
+def verse_7_8 ():
+ return adverb2() + ' ' + verb2() + ' ' + noun3() + punct()
+def verse_7_9 ():
+ return adverb2() + ' ' + verb3() + ' ' + noun2() + punct()
+def make_verse_5 ():
+ verse = random.choice([1,2,3, 4, 5, 6, 7, 8])
+ if verse == 1:
+ return verse_5_1()
+ elif verse == 2:
+ return verse_5_2()
+ elif verse == 3:
+ return verse_5_3()
+ elif verse == 4:
+ return verse_5_4()
+ elif verse == 5:
+ return verse_5_5()
+ elif verse == 6:
+ return verse_5_6()
+ elif verse == 7:
+ return verse_5_7()
+ else:
+ return verse_5_8()
+def make_verse_7 ():
+ verse = random.choice([1,2,3, 4, 5, 6, 7, 8, 9])
+ if verse == 1:
+ return verse_7_1()
+ elif verse == 2:
+ return verse_7_2()
+ elif verse == 3:
+ return verse_7_3()
+ elif verse == 4:
+ return verse_7_4()
+ elif verse == 5:
+ return verse_7_5()
+ elif verse == 6:
+ return verse_7_6()
+ elif verse == 7:
+ return verse_7_7()
+ elif verse == 8:
+ return verse_7_8()
+ else:
+ return verse_7_9()
+print(make_verse_5())
+print(make_verse_7())
+print(make_verse_5())
+print(make_verse_7())
+print(make_verse_7())
+num = int (input ("Введите натуральное чиcло: "))
+i = 1
+while 2**i < num:
+ print (2**i)
+ i+=1
+
+
+import re
+def read_file():
+ with open ('corp.txt', 'r', encoding='UTF-8') as file:
+ text=file.read()
+ file.close()
+ return text
+def counter():
+ file = open('corp.txt', 'r', encoding='UTF-8')
+ i=1
+ for line in file:
+ if '' not in line:
+ i+=1
+ else:
+ break
+ file.close()
+ return i
+def five_points():
+ new_file=open('подсчет строк.txt', 'w', encoding='utf-8')
+ text=new_file.write('Число строк заголовка: ' + str(counter()))
+ new_file.close()
+def dictionary():
+ d={}
+ wordlist=[]
+ lemmas=re.findall('>\w+', read_file())
+ for lemma in lemmas:
+ lemma=lemma.strip('>')
+ wordlist.append(lemma)
+ for word in wordlist:
+ if word in d:
+ d[word]+=1
+ else:
+ d[word]=1
+ return d
+def eight_points():
+ d=dictionary()
+ dic_file=open('словарик.txt', 'w', encoding='utf-8')
+ for key in d:
+ text=dic_file.write(key+' - '+ str(d[key])+' \n')
+ dic_file.close()
+def ten_points():
+ formlist=[]
+ file = open('corp.txt', 'r', encoding='UTF-8')
+ for line in file:
+ pronom=re.search('type="(f.h.+?)"', line)
+ if pronom != None:
+ find=pronom.group(1)
+ formlist.append(find)
+ return formlist
+five_points()
+eight_points()
+print ('Загляните в папку с программой и попробуйте найти в ней новые txt-файл.')
+print (ten_points())import os
+
+
+
+
+def lists_creator():
+ aaa=[]
+ punct=[]
+ file_list=[files for root, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки')]
+ for folder in file_list:
+ for file in folder:
+ q_a=0
+ q_punct=0
+ for word in file:
+ for letter in word:
+ if letter=='a' or letter =='A' or letter =='А' or letter =='а':
+ q_a+=1
+ if letter =='.' or letter ==',' or letter =='?' or letter =='!' or letter =='(' or letter == ')' or letter =='-':
+ q_punct+=1
+ if q_a>3:
+ aaa.append(file)
+ if q_punct-1>0:
+ punct.append(file)
+ print ('+++++++++++++Файлы, в которых большк 3х "а":+++++++++++++')
+ for el in aaa:
+ print (el)
+ print ('+++++++++++++Файлы со знаками препинания в названии:+++++++++++++')
+ for el in punct:
+ print(el)
+
+
+
+
+
+
+
+
+
+
+def kracuvo():
+ for roots, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки'):
+ for dir in dirs:
+ print('--',dir)
+ path='/home/lera/Рабочий стол/Загрузки'+'/'+str(dir)
+ for file in os.listdir(path):
+ print (' ', file)
+
+
+print(os.path.join('дз ап', 'morozova3.docx'))import os
+import re
+def s_counter_5():
+ s_result = open('res.txt', 'w', encoding='utf-8')
+ for root, dirs, files in os.walk('news'):
+ s_result = open('res.txt', 'w', encoding='utf-8')
+ for file in files:
+ with open(os.path.join('news',file), 'r') as f:
+ file_text = f.read()
+ q=0
+ for line in file_text:
+ if line=='.' or line=='?' or line=='!':
+ q+=1
+ text=s_result.write(file +'\t'+str(q)+ '\n')
+ s_result.close()
+ return s_result
+def table_8():
+ table = open('table.csv', 'w', encoding='utf-8')
+ for root, dirs, files in os.walk('news'):
+ for file in files:
+ with open(os.path.join('news',file), 'r') as f:
+ file_text = f.read()
+ info=re.findall('.+', file_text)
+ for el in info:
+ a=re.search('>([a-яА-Я]+.[a-яА-Я]+)?\.', el)
+ if a!=None:
+ author=a.group(1)
+ else:
+ author='no author'
+ text=table.write(author+'\n')
+
+
+
+
+s_counter_5
+table_8()
+
+
+import re
+def file_name():
+ print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список словоформ:')
+ name=input()
+ return name
+def read_file():
+ wordlist=[]
+ file = open(file_name(), 'r', encoding='UTF-8')
+ for line in file:
+ linelist=line.split()
+ for word in linelist:
+ word=word.lower()
+ word=word.strip('.,:;"«»-?()!')
+ wordlist.append(word)
+ file.close()
+ return wordlist
+def form_finder():
+ form_list=[]
+ for word in read_file():
+ form=re.search('(не(до)?|под)?вып[еиь]([йтлеюи]|(вш))[мшьаоиыуе]?(го|м(у|и)?[ейяюх])?(ся)?', word)
+ if form!=None:
+ find=form.group()
+ form_list.append(word)
+ return form_list
+def list_without_repetitions():
+ list=form_finder()
+ for el in list:
+ el_new=el
+ for el in list:
+ if el_new==el:
+ list.remove(el)
+ return list
+for el in list_without_repetitions():
+ print(el)print ('Введите число')
+n=int(input())
+for i in range(n):
+ print('Введите слово')
+ a=input()
+ print ('Ваше слово:', a)
+ if a=='программирование':
+ break
+print ('Цикл завершен')f=open('wordlist.txt', 'r', encoding='utf-8')
+for line in f:
+ arr = line.split()
+ for i,word in enumerate(arr):
+ arr[i] = word.strip('.,?!;:-"')
+ for el in arr:
+ el=el.lower()
+ print (el)
+
+
+
+
+import random
+def open_file():
+ file = open('wordlist.txt', 'r', encoding='UTF-8')
+ lines = file.readlines()
+ file.close()
+ return lines
+def random_word(lines):
+ ugly_word = random.choice(lines)
+ word = ugly_word.strip('\n')
+ return word
+def syllable_counter(word):
+ syl_quan=0
+ for letter in word:
+ if letter=='e' or letter=='y' or letter=='u' or letter=='i' or letter=='o' or letter=='a' or letter=='é' or letter=='è' or letter=='ê' or letter=='à' or letter=='â' or letter=='ù' or letter=='û' or letter=='ô' or letter=='î':
+ syl_quan+=1
+ return syl_quan
+def line_creator(syl_number):
+ syl_max = syl_number
+ line = ''
+ while syl_max >= 0:
+ word = random_word(open_file())
+ syl_quan = syllable_counter(word)
+ syl_max -= syl_quan
+ if syl_max > 0:
+ line=line+' '+ word
+ continue
+ elif syl_max == 0:
+ line = line + ' ' + word
+ break
+ elif syl_max < 0:
+ line = ''
+ syl_max = syl_number
+ continue
+ punctuation=['!','.','?']
+ phrase=line[1].upper()+line[2:]+random.choice(punctuation)
+ print (phrase)
+def main():
+ print('\nThere you can see one more perfect creation:\n')
+ line_creator(5)
+ line_creator(7)
+ line_creator(5)
+if __name__ == '__main__':
+ main()
+
+
+
+
+import os
+import re
+for item in files:
+ file_name=item.split('.')
+ if len(file_name[0])==5:
+ lat=re.search('[A-Za-z]{5}', file_name[0])
+ if lat!=None:
+ i+=1
+ if file_name[0] not in name_base:
+ name_base.append(file_name[0])
+print ('Число файлов с названием из пяти латинских символов: ',i)
+print ('\nСписок названий найденных файлов (без повторов):')
+for el in name_base:
+ print (el)
+
+
+import os
+dirlist = [el for root, dirs, files in os.walk('.') for el in dirs]
+stat = {}
+letters = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбю'
+letter = [name[0].lower() for name in dirlist]
+for el in letter:
+ if el not in letters:
+ letter.remove(el)
+ if el in stat:
+ stat[el] += 1
+ else:
+ stat[el] = 1
+i = 0
+res = 0
+for value in stat:
+ if stat[value] > i:
+ i = stat[value]
+ res = value
+if i==0:
+ print ('Названий, начинающихся с букв, похоже, тут нет :(')
+else:
+ print('Чаще всего названия папок начинаются с буквы:', res, '\nТакие названия встречаются', i, 'раз(a)')
+word=input('Введите слово: ')
+if word:
+ for i in range(len(word)):
+ print (word[i:]+word[:i])
+ if i>len(word)-1:
+ break
+else:
+ print ('Нет входных данных')def read_words():
+ wordlist=[]
+ file = open('austen.txt', 'r', encoding='UTF-8')
+ for line in file:
+ linelist=line.split()
+ for word in linelist:
+ wordlist.append(word)
+ file.close()
+ return wordlist
+def counter(part):
+ quan=0
+ for word in read_words():
+ if word[-len(part):]==part:
+ quan+=1
+ return quan
+print ('Число форм в данном тексте, оканчивающихся на -ed: ',counter('ed'))
+print ('Из них - правильные глаголы в прошедшем времени на -y:',counter('ied'))import re
+def file_name():
+ print ('Поместите файл в одну папку с данной программой.\n��ведите имя файла, чтобы получить список cфер деятельности данного ученого:')
+ name=input()
+ return name
+def reader():
+ list=[]
+ file = open(file_name(), 'r', encoding='UTF-8')
+ for line in file:
+ line=line.strip('\n')
+ list.append(line)
+ file.close()
+ return list
+def str_sphere():
+ infobox=reader()
+ sphere=''
+ q=0
+ for line in infobox:
+ if 'Научная сфера:' in line:
+ sphere=infobox[q+2]
+ break
+ else:
+ q+=1
+ return sphere
+def main():
+ form=re.findall('>[а-я -]+', str_sphere())
+ list=''
+ for el in form:
+ el=el.strip('>.+',i)
+ form2=re.findall('ana',i)
+ for el in form2:
+ num_ana+=1
+ if form1!=None:
+ num_w+=1
+ koef=num_ana/num_w
+ return koef
+
+
+
+def freq_dict_8():
+ d={}
+ list=[]
+ new_list=[]
+ for i in reader():
+ form=re.search('gr="(.+)"',i)
+ if form!=None:
+ list.append(form.group(1))
+ for el in list:
+ i = el.split(',')
+ new_list.append(i)
+ keys=[item[0].strip('=qwertyuiopasdfghjklzxcvbnm/<>" ') for item in new_list if item!='NUM=nom" /> 0 :
+ print('X'*nlist[i])
+ else:
+ print('')
+ i += 1
+
+
+
+
+
+
+
+
+
+
+
+import re
+def openfile_lines(fname):
+ with open(fname, 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ return lines
+def find_words(lines):
+ words = []
+ for i in range(len(lines)):
+ if re.search('(.+?)<', words[i]):
+ found_lemma = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(1)
+ found_type = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(2)
+ found_form = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(3)
+ pure.append([found_lemma, found_type, found_form])
+ return pure
+def count_forms(words):
+ freq = {}
+ for i in range(len(words)):
+ form = re.search('type="(.+?)"', words[i]).group(1)
+ if form not in freq:
+ freq[form] = 1
+ else:
+ freq[form] += 1
+ return freq
+def plural_adjectives(freqs):
+ forms = list(freqs.keys())
+ pluradj = []
+ for i in range(len(forms)):
+ if re.search('l.f.*', forms[i]):
+ adj_form = re.search('l.f.*', forms[i]).group()
+ if adj_form:
+ pluradj.append(adj_form)
+ pluradj_freq = {}
+ for i in range(len(pluradj)):
+ pluradj_freq[pluradj[i]] = freqs[pluradj[i]]
+ return pluradj_freq
+def main():
+ lines_dict = openfile_lines('dict.txt')
+ word_list = find_words(lines_dict)
+ pure_info = purify_info_about_words(word_list)
+ freq_dict = count_forms(word_list)
+ pluradj_freq_dict = plural_adjectives(freq_dict)
+ with open('lines.txt', 'w', encoding = 'utf-8') as f:
+ f.write(str(len(lines_dict)))
+ with open('word forms.txt', 'w', encoding = 'utf-8') as f:
+ f.write('\n'.join(freq_dict.keys()))
+ with open('plural adjectives frequencies.txt', 'w', encoding = 'utf-8') as f:
+ text = ''
+ for key in pluradj_freq_dict:
+ text += str(key)+' '+str(pluradj_freq_dict[key])+'\n'
+ f.write(text)
+ with open('dictionary.csv', 'w', encoding='utf-8') as f:
+ header = ['лемма', 'грамматическая форма', 'словоформа']
+ f.write(','.join(header)+'\n')
+ for i in range(len(pure_info)):
+ f.write(','.join(pure_info[i])+'\n')
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+import csv
+def main():
+ clues = {}
+ with open('clues.csv', 'r', encoding='utf-8') as f:
+ text = csv.reader(f, delimiter=',')
+ for row in text:
+ clues[row[0]] = row[1]
+ n = 0
+ keys = list(clues.keys())
+ while n < len(clues):
+ i = 0
+ while i <= len(keys[n]):
+ if i < len(keys[n]):
+ response = input(keys[n]+'...')
+ if response == clues[keys[n]]:
+ print('Правильно!')
+ n += 1
+ break
+ else:
+ print('Неправильно. У тебя ещё '+str(len(keys[n]) - i+1)+' попыток.')
+ i += 1
+ elif i == len(keys[n]):
+ response = input(keys[n]+'...')
+ if response == clues[keys[n]]:
+ print('Правильно!')
+ n += 1
+ break
+ else:
+ print('У тебя закончились попытки. Правильный ответ: '+keys[n]+' '+clues[keys[n]])
+ n += 1
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+
+
+
+n = int(input('Введите целое положительное число.'))
+index = 0
+while index < n:
+ index +=1
+ word = input('Введите слово.')
+ if word == 'программирование':
+ break
+ print(word)
+
+
+
+
+
+
+word = input('Введите слово в русской раскладке.')
+index = 0
+while index < len(word):
+ index += 1
+ if word[len(word) - index] != 'з' and word[len(word) - index] != 'я':
+ print(word[len(word) - index])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import os
+import re
+import csv
+def open_file_texts(directory):
+ raw_texts_dict = {}
+ for root, dirs, files in os.walk(directory):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding='windows-1251') as t:
+ text = t.read()
+ raw_texts_dict[f] = text
+ return raw_texts_dict
+def get_sentences(text):
+ sentences = re.findall('(.|\n)+?', text)
+ return sentences
+def write_out_count_sentences(file_texts_dict):
+ with open('amount of sentences.txt', 'w', encoding='utf-8') as f:
+ for filename in file_texts_dict:
+ text = file_texts_dict[filename]
+ sent_am = len(get_sentences(text))
+ f.writelines(filename+'\t'+str(sent_am)+'\n')
+def get_words(raw_text):
+
+ word_list = []
+ raw_lines = raw_text.split()
+ word_lines = re.findall('(.+?)((?:\n?[«»,.! \?\-])*)', raw_text)
+ for i in range(len(word_lines)):
+ line = word_lines[i][0].strip('').strip('')
+ ana, word = line.split('')
+ ana = ana.strip('>').strip().strip('ana').strip()
+ word_list.append([word] + [word_lines[i][1].strip().strip(' ')] + [ana])
+ return word_list
+def create_clear_text_out_of_words(word_list):
+ text = []
+ for el in range(len(word_list)):
+ word = word_list[el]
+ d = re.match('\d+', word[2])
+ if '«' in word[2]:
+ text.append(word[0] + ' «')
+ elif d:
+ text.append(word[0] + ' ' + d.group(0) +' ')
+ else:
+ text.append(word[0] + word[2] + ' ')
+ return text
+def find_file_meta (file_texts_dict):
+ file_meta_list = []
+ for filename in file_texts_dict:
+ text = file_texts_dict[filename]
+ author = re.search('', text)
+ if author:
+ author = re.search('', text).group(1)
+ topic = re.search('', text)
+ if topic:
+ topic = re.search('', text).group(1)
+ file_meta_list.append([filename, author, topic])
+ return file_meta_list
+def write_out_file_meta (file_meta_list):
+ with open('file metadata.csv', 'w', encoding='utf-8') as n:
+ text = csv.writer(n, delimiter=';')
+ header = ['Название файла', 'Автор', 'Тематика текста']
+ text.writerow(header)
+ for row in file_meta_list:
+ text.writerow(row)
+def find_spec_bigr_in_sentence(word_list):
+ spec_bigr = []
+ for i in range(len(word_list)):
+ word = word_list[i]
+ if i > 0:
+ previous_word = word_list[i-1]
+ if 'loc' in word[2] and 'PR' in previous_word[2]:
+ spec_bigr.append(previous_word[0]+' '+word[0])
+ return spec_bigr
+def find_all_spec_bigr(raw_texts_dict):
+ sbec_bigr = []
+ texts = raw_texts_dict.values()
+ for text in texts:
+ sentences = get_sentences(text)
+ for sentence in sentences:
+ sentence_word_list = get_words(sentence)
+ sentence_spec_bigr = find_spec_bigr_in_sentence(sentence_word_list)
+ context = create_clear_text_out_of_words(sentence_word_list)
+ for bigr in sentence_spec_bigr:
+ sbec_bigr.append([bigr, context])
+ return sbec_bigr
+def write_out_spec_bigr(spec_bigr):
+ with open('bigrams.txt', 'w', encoding='utf-8') as f:
+ for bigr in spec_bigr:
+ f.writelines(bigr[0]+'\t'+bigr[1]+'\n')
+def main():
+ raw_texts_dict = open_file_texts('news')
+ write_out_count_sentences(raw_texts_dict)
+ file_meta = find_file_meta(raw_texts_dict)
+ write_out_file_meta(file_meta)
+ spec_bigr = find_all_spec_bigr(raw_texts_dict)
+ write_out_spec_bigr(spec_bigr)
+if __name__ == '__main__':
+ main()
+
+words = []
+with open('words.txt','r', encoding = 'utf-8') as f:
+ text = f.read()
+ words = text.split('\n')
+for i in range(len(words)):
+ if ' союз ' in words[i]:
+ print(words[i])
+
+words = []
+with open('words.txt','r', encoding = 'utf-8') as f:
+ text = f.read()
+ words = text.split('\n')
+feminin = []
+ipm = 0
+word = ''
+gram = ''
+ipmi = ''
+for i in range(len(words)):
+ if 'сущ' in words[i] and 'жен' in words[i]:
+ feminin.append(words[i])
+ word, gram, ipmi = words[i].split('|')
+ ipm += float(ipmi)
+for i in range(len(feminin)):
+ print(feminin[i]+',')
+print(ipm)
+
+
+words = []
+with open('words.txt','r', encoding = 'utf-8') as f:
+ text = f.read()
+ words = text.split('\n')
+words1 = []
+word = input('Print any russian word. ')
+while word:
+ words1.append(word)
+ word = input('Print any russian word. ')
+for i in range(len(words1)):
+ check = 0
+ for x in range(len(words)):
+ if words[x].count('|') == 2:
+ word, gram, ipmi = words[x].split('|')
+ if words1[i] == word.strip(' '):
+ print('grammar:', gram.strip(' ')+',' , 'ipm =', float(ipmi))
+ check = 1
+ if check == 0:
+ print('This word was not find in the dictionary.')
+
+
+
+
+
+
+
+
+
+
+
+
+text = input('Type something: ')
+for i in range(len(text)):
+ print(text[i:]+text[:i])
+
+
+
+
+
+import re
+def match_verb_forms(line):
+ infinitive = re.match(r'программировать(ся)?', line, re.I)
+ future = re.match(r'буд(е(шь|те?|м)|ут?) программировать', line, re.I)
+ present = re.match(r'программиру(ю|(е(те?|м|шь)))', line, re.I)
+ past = re.match(r'программировал(а|и)?', line, re.I)
+ past_participle = re.match(r'программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
+ present_participle = re.match(r'программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
+ transgressive_active = re.match(r'программируя', line, re.I)
+ transgressive_passive_past = re.match(r'будучи программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
+ transgressive_passive_present = re.match(r'будучи программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
+ if infinitive and not future:
+ match = infinitive
+ elif future:
+ match = future
+ elif present:
+ match = present
+ elif past:
+ match = past
+ elif past_participle:
+ match = past_participle
+ elif present_participle:
+ match = present_participle
+ elif transgressive_active:
+ match = transgressive_active
+ elif transgressive_passive_past and not past_participle:
+ match = transgressive_passive_past
+ elif transgressive_passive_present and not present_participle:
+ match = transgressive_passive_present
+ else:
+ match = None
+ return match
+def open_forms(fname):
+ forms = []
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ forms = text.split()
+ for i in range(len(forms)):
+ forms[i] = forms[i].strip('.,?*()«»')
+ return forms
+def main():
+ matches = []
+ forms = open_forms('test.txt')
+ for i in range(len(forms)-1):
+ if i < len(forms):
+ if match_verb_forms(forms[i] +' '+ forms[i+1]):
+ if match_verb_forms(forms[i] +' '+ forms[i+1]).group() not in matches:
+ matches.append(match_verb_forms(forms[i] +' '+ forms[i+1]).group())
+ else:
+ if match_verb_forms(forms[i]):
+ if match_verb_forms(forms[i]).group()not in matches:
+ matches.append(match_verb_forms(forms[i]).group())
+ print(*matches)
+if __name__ == '__main__':
+ main()
+
+
+
+import re
+import os
+import shutil
+flist = os.listdir(os.getcwd())
+clist = []
+cfcount = 0
+for n in flist:
+ cyrillic = 1
+ name = n.split('.')[0]
+ for let in name:
+ if not re.match('[А-Яа-яЁё]',let):
+ cyrillic = 0
+ if cyrillic == 1:
+ if os.path.isdir(n):
+ cfcount += 1
+ if name not in clist:
+ clist.append(name)
+print(cfcount)
+print(clist)
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+import csv
+def open_file(name):
+ with open(name, 'r', encoding='utf-8') as f:
+ file_text = f.read()
+ return file_text
+def get_words(raw_text):
+ word_arr = []
+ raw_lines = raw_text.split()
+ word_lines = re.findall('(.+)((?:\n?[«»,.! \?\-])*(?:\n?[01234567])*)', raw_text)
+ for i in range(len(word_lines)):
+ line = word_lines[i][0].strip('').strip('').split(' 0:
+ line[e] = line[e].strip(' />')
+ word_arr.append([line[0]] + [len(line)-1] + [word_lines[i][1].strip().strip(' ')] + line[1:])
+ return word_arr
+def count_average_anas(word_arr):
+ total = 0
+ average = 0
+ for i in range(len(word_arr)):
+ total += word_arr[i][1]
+ average = total/len(word_arr)
+ return average
+def count_all_pos(word_arr):
+ pos_dict = {}
+ for i in range(len(word_arr)):
+ for el in range(len(word_arr[i])):
+ if el > 2:
+ pos = re.search('gr="(\w+)', word_arr[i][el]).group(1)
+ if pos not in pos_dict:
+ pos_dict[pos] = 1
+ else:
+ pos_dict[pos] += 1
+ with open('parts of speech frequency.txt', 'w', encoding='utf-8') as f:
+ for pos in pos_dict:
+ f.writelines(pos+'\t'+str(pos_dict[pos])+'\n')
+ return pos_dict
+def make_text(word_arr):
+ text = []
+ for el in range(len(word_arr)):
+ word = word_arr[el]
+ d = re.match('\d+', word[2])
+ if '«' in word[2]:
+ text.append(word[0] + ' «')
+ elif d:
+ text.append(word[0] + ' ' + d.group(0) +' ')
+ else:
+ text.append(word[0] + word[2] + ' ')
+ return text
+def find_all_instr(word_arr, text):
+ instr_words_dict = {}
+ for n in range(len(word_arr)):
+ word = word_arr[n]
+ for i in range(len(word)):
+ if i > 1:
+ instr = re.search('ins', word[i])
+ if instr:
+ if word[0] not in instr_words_dict:
+ instr_words_dict[word[0]] = [n]
+ print(instr_words_dict[word[0]])
+ elif n not in instr_words_dict[word[0]]:
+ instr_words_dict[word[0]] += [n]
+ print(instr_words_dict[word[0]])
+ with open('words in instrumentalis.txt', 'w', encoding='utf-8') as f:
+ for word in instr_words_dict:
+ for i in range(len(instr_words_dict[word])):
+ x = instr_words_dict[word][i]
+ y = min(x+4, len(text)-1)
+ f.writelines(''.join(text[x-3:x])+'\t'+word+'\t'+''.join(text[x+1:y])+'\n')
+ return instr_words_dict
+def main():
+ raw_text = open_file('text.xml')
+ word_arr = get_words(raw_text)
+ average_anas = count_average_anas(word_arr)
+ print(average_anas)
+ count_all_pos(word_arr)
+ find_all_instr(word_arr, make_text(word_arr))
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+import re
+def open_text_phrases(fname):
+ phrases = []
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = re.sub('\.\.\.|[\.\?]', '!', text)
+ phrases = text.split('!')[:-1]
+ for i in range(len(phrases)):
+ phrases[i] = re.sub('[<>\*\.«»,\'\"]','', phrases[i])
+ phrases[i] = phrases[i].strip()
+ return phrases
+def main():
+ phrase_list = open_text_phrases('text.txt')
+ word_length_list = [[w, len(w)] for phrase in phrase_list for w in phrase.split()]
+ template = '{}_{}'
+ for word in word_length_list:
+ print(template.format(word[0], word[1]))
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+
+
+import re
+import csv
+def openforms(text):
+ forms = []
+ text = text.lower()
+ forms = text.split()
+ for i in range(len(forms)):
+ forms[i] = forms[i].strip('.,?*()«»!\'\":; ')
+ return forms
+def freqlist(forms):
+ freqs = {}
+ for i in range(len(forms)):
+ if forms[i] not in freqs:
+ freqs[forms[i]] = 1
+ else:
+ freqs[forms[i]] +=1
+ return freqs
+def freqlist_to_csv(freqs):
+ with open('freq.csv', 'w', encoding='utf-8') as f:
+ output = csv.writer(f, delimiter=',')
+ header = ['слово', 'частота']
+ output.writerow(header)
+ for key in sorted(freqs):
+ output.writerow([key, freqs[key]])
+def agosforms(text):
+ agos = re.findall('(?:(?:[А-Яа-яіѢѣЁё])+[\s,.!\?:;"\(\)\'»\n\t—]+?){3}[А-Яа-яiѢѣ]+?аго [А-Яа-яiѢѣ]+?(?:а|и)[\s,.!\?:;"\(\)\'»\n\t—]{,5}(?:[А-Яа-яiѢѣ]+?[\s,.!\?;:—"\(\)\'»\n\t]+?){3}',text)
+ with open('agos.txt', 'w', encoding='utf-8') as f:
+ output = f.write('\n'.join(agos))
+def main():
+ with open ('Лесков.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ forms = openforms(text)
+ print(len(forms))
+ freqs = freqlist(forms)
+ freqlist_to_csv(freqs)
+ agosforms(text)
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+import re
+def main():
+ with open('cats.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ mark_dogs = re.sub('([Сс]обак(?:а(?:х|ми?)?|и|е|у|о(?:й|ю))?)([\s,.!\?:"\(\)\'»\n\]\[-])', '<<<тут было слово \\1>>> \\2', text)
+ catstodogs = re.sub('([\s,.!\?:"\(\)\'«\n-])коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1собак\\2\\3', mark_dogs)
+ CatstoDogs = re.sub('([\s,.!\?:"\(\)\'«\n-])Коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1Собак\\2\\3', catstodogs)
+ dogstocats = re.sub('<<<тут было слово собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'кошк\\1', CatstoDogs)
+ dogstocats2 = re.sub('<<<тут было слово собак>>>', 'кошек', dogstocats)
+ DogstoCats = re.sub('<<<тут было слово Собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'Кошк\\1', dogstocats2)
+ DogstoCats2 = re.sub('<<<тут было слово Собак>>>', 'Кошек', DogstoCats)
+ catishtodogish = re.sub('кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'собач\\1', DogstoCats2)
+ CatishtoDogish = re.sub('Кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'Собач\\1', catishtodogish)
+ kittenstopyppies = re.sub('котята','щенята', CatishtoDogish)
+ KittenstoPyppies = re.sub('Котята','Щенята', kittenstopyppies)
+ kittentopyppy = re.sub('кот(?:е|ё)н(ок|ку)','щен\\1', KittenstoPyppies)
+ KittentoPyppy = re.sub('Кот(?:е|ё)н(ок|ку)','Щен\\1', kittentopyppy)
+ print(KittentoPyppy)
+if __name__ == '__main__':
+ main()
+
+
+import re
+def main():
+ with open('dates.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ dates = re.findall('(?:(?:0|1|2)|3(?:0|1))[0-9]\.(?:0|1(?:1|2)?)[0-9]\.[0-9]{2}', text)
+ print(*dates)
+if __name__ == '__main__':
+ main()
+
+
+import re
+def main():
+ with open('aphasy.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', text)
+ while re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) != clear:
+ clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear)
+ print(clear)
+if __name__ == '__main__':
+ main()
+
+import re
+def three_consonants(text):
+ cons3 = re.findall('[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[йцкнгшщзхфвпрлджчсмтб]{3}[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[^\s,.!\?:"\(\)\'«»\n]*?[\s,.!\?:"\(\)\'»\n]', text, re.I)
+ for i in range(len(cons3)):
+ cons3[i] = cons3[i].strip('\s,.!\?:"\(\)\'»\n\t ')
+ return cons3
+def startwith(text):
+ abcs = re.findall(r'\b(?:а|о)(?:б|в).+?[\s,.!\?:"\(\)\'»\n]', text, re.I)
+ for i in range(len(abcs)):
+ abcs[i] = abcs[i].strip('\s,.!\?:"\(\)\'»\n\t ')
+ return abcs
+def proper_nouns(text):
+ proper = re.findall('[а-яёa-z0-9] [А-ЯЁA-Z][а-яёa-z]+?[\s,.!\?:"\(\)\'»\n]' , text)
+ for i in range(len(proper)):
+ proper[i] = proper[i].split()[1]
+ proper[i] = proper[i].strip('\s,.!\?:"\(\)\'»\n\t ')
+ return proper
+def analytical_future(text):
+ future = re.findall('буд(?:е(?:шь|те?|м)|ут?) .+?(?:а|е|и)ть(?:ся)?', text, re.I)
+ return future
+def polysyllabic(text):
+ poly = re.findall(r'\b(?:[йцкнгшщзхфвпрлджчсмтб]*?[уеыаоюяиэ]){5,}[а-я]*?[\s,.!\?:"\(\)\'»\n]', text)
+ for i in range(len(poly)):
+ poly[i] = poly[i].strip('\s,.!\?:"\(\)\'«»\n\t ')
+ return poly
+def roman_num(text):
+ rawroman = re.findall('\sC?M*?C?D?L?C{,4}X?L?I?X{,4}I?V?I{,4}\s', text)
+ roman = []
+ for i in range(len(rawroman)):
+ rawroman[i] = rawroman[i].strip('\s,.!\?:"\(\)\'«»\n\t ')
+ if rawroman[i]:
+ roman.append(rawroman[i])
+ return roman
+def main():
+ with open('text.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+
+
+
+
+
+
+if __name__ == '__main__':
+ main()
+
+
+
+import re
+def clean(html):
+ noscript = re.sub('', '', html)
+ nostyle = re.sub('', '', noscript)
+ nospan = re.sub(']*?>[^<>]*?', '', nostyle)
+ notags = re.sub('<[^>]*>', '', nospan)
+ notags1 = re.sub('{[^}]*}', '', notags)
+ text = re.sub('[&][^;]*;', ' ', notags1)
+ text = re.sub(r'\s+', ' ', text)
+ return text
+def html(text):
+ tags = re.findall(r'<[^>]*?>', text)
+ return tags
+def main():
+ with open('schizo.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ with open('html.txt', 'w', encoding = 'utf-8') as f:
+ output = f.write('\n'.join(html(text)))
+ with open('pure.txt', 'w', encoding = 'utf-8') as f:
+ output = f.write(clean(text))
+if __name__ == '__main__':
+ main()
+
+
+
+import re
+def main():
+ given = input('Введите свой телефонный номер: ')
+ right = re.search('\+7 \([0-9]{3}\) [0-9]{3}-[0-9]{2}-[0-9]{2}', given)
+ if right:
+ print('Введённый номер совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.')
+ if re.search('\(9(?:2|3)', given):
+ print('Это Мегафон.')
+ elif re.search('\(9(?:1|8)', given):
+ print('Это МТС.')
+ elif re.search('\(96', given):
+ print('Это Билайн.')
+ else:
+ print('Я не могу точно сказать, какой это оператор.')
+ else:
+ print('Введённый номер не совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.')
+ if re.search('\(9(?:2|3)', given) or re.search('\+7 ?9(?:2|3)', given) or re.match('8 ?9(?:2|3)', given):
+ print('Это Мегафон.')
+ elif re.search('\(9(?:1|8)', given) or re.search('\+7 ?9(?:1|8)', given) or re.match('8 ?9(?:1|8)', given):
+ print('Это МТС.')
+ elif re.search('\(96', given) or re.search('\+7 ?96', given) or re.match('8 ?96', given):
+ print('Это Билайн.')
+ else:
+ print('Я не могу точно сказать, какой это оператор.')
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+import re
+def revert(dictionary):
+ reverted = {}
+ for key in dictionary:
+ reverted[dictionary[key]] = key
+ return reverted
+def russian_to_latin_dictionary(lines):
+ raw = {}
+ rus_to_lat = {}
+ for i in range(len(lines)):
+ raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n')
+ raw = revert(raw)
+ for key in raw:
+ if len(key.split(',')) > 1:
+ for i in range(len(key.split(','))):
+ rus_to_lat[key.split(',')[i-1].strip()] = raw[key]
+ i +=10
+ else:
+ rus_to_lat[key] = raw[key]
+ return rus_to_lat
+def latin_to_russian_dictionary(lines):
+ raw = {}
+ lat_to_rus = {}
+ for i in range(len(lines)):
+ raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n')
+ for key in raw:
+ if len(key.split(',')) > 1:
+ for i in range(len(key.split(','))):
+ lat_to_rus[key.split(',')[i-1].strip()] = raw[key]
+ i +=10
+ else:
+ lat_to_rus[key] = raw[key]
+ return lat_to_rus
+def main():
+ with open ('latin.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for i in range(len(lines)):
+ lines[i] = re.sub('(?:–|−|-)', '—', lines[i])
+ lines[i] = re.sub(';', ',', lines[i])
+ print(latin_to_russian_dictionary(lines))
+ print(russian_to_latin_dictionary(lines))
+if __name__ == '__main__':
+ main()
+
+
+
+def process(fname):
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ forms = text.split()
+ for i in range(len(forms)):
+ forms[i] = forms[i].strip('.,!?*()«»\'":][><')
+ return forms
+def freqlist(forms):
+ freqs = {}
+ for i in range(len(forms)):
+ if forms[i] not in freqs:
+ freqs[forms[i]] = 1
+ else:
+ freqs[forms[i]] +=1
+ return freqs
+def maxfreq(frequencies):
+ maximumfreq = []
+ for key in frequencies:
+ if frequencies[key] == max(frequencies.values()):
+ maximumfreq.append(key)
+ return maximumfreq
+def averagefreq(frequencies):
+ total = 0
+ for key in frequencies:
+ total += frequencies[key]
+ average = total/len(frequencies)
+ return average
+def main():
+ forms = process('text.txt')
+ frequencies = freqlist(forms)
+ print(*maxfreq(frequencies), '- самое частотное слово в тексте.')
+ print(averagefreq(frequencies), '- средняя частота слов в тексте.')
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+
+
+
+
+import random
+def ask():
+ user_info = []
+ name = input('Как Вас зовут? ')
+ surname = input('Какая у Вас фамилия? ')
+ age = input('Сколько Вам лет? ')
+ food = input('Какая у Вас любимая еда? ')
+ musician = input('Какая у Вас любимая музыкальная группа? ')
+ dream = input('Какая у Вас заветная мечта? ')
+ user_info.append(name+' '+surname)
+ user_info.append([age, food, musician, dream])
+ return user_info
+def guess(database_dictionary):
+ person = random.choice(list(database_dictionary.keys()))
+ clue = random.choice(['его/её мечта: '+database_dictionary[person][3], 'его/её любимая музыкальная группа: '+database_dictionary[person][2], 'его/её мечта: '+database_dictionary[person][3]+'\nего/её любимая еда: '+database_dictionary[person][1], 'его/её любимая музыкальная группа: '+database_dictionary[person][2]+'\nего/её любимая еда: '+database_dictionary[person][1]])
+ guess = input('Угадайте, кто это (имя и фамилию)? Подсказка: '+clue+' ')
+ if guess == person:
+ return 'Правильно!'
+ else:
+ return 'Нет, неправильно, это - '+person
+def main():
+ database = {}
+ i = 0
+ while i < 7:
+ answer = ask()
+ i += 1
+ database[answer[0]] = answer[1]
+ print(guess(database))
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+import re
+def main():
+ with open('Лингвистика.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ lang = 'язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])'
+ Lang = 'Язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])'
+ new_text = re.sub(lang,'шашлык\\1\\2', text)
+ new_text = re.sub(Lang,'Шашлык\\1\\2', new_text)
+ with open('Новая лингвистика.txt', 'w', encoding = 'utf-8') as f:
+ f.write(new_text)
+if __name__ == '__main__':
+ main()
+
+
+import os
+extension_frequency_list = {}
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ file_name = f.split('.')[0]
+ file_ext = f.split('.')[1]
+ if file_ext not in extension_frequency_list:
+ extension_frequency_list[file_ext] = 1
+ else:
+ extension_frequency_list[file_ext] += 1
+max_ext = max(extension_frequency_list.values())
+i = 0
+for key in extension_frequency_list:
+ if extension_frequency_list[key] == max_ext:
+ if i == 0:
+ print('The most frequent extention is \''+key+'\'. There is(are) '+str(extension_frequency_list[key])+' file(s) with it.')
+ i = 1
+ else:
+ print('There is(are) also '+str(extension_frequency_list[key])+' \''+key+'\' file(s).')
+
+
+
+
+def opentext(fname):
+ forms = []
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ forms = text.split()
+ for i in range(len(forms)):
+ forms[i] = forms[i].strip('.,!?*()«»\'"')
+ return forms
+def adj_hood(fname):
+ words = opentext(fname)
+ hoods = []
+ for i in range(len(words)):
+ if len(words[i])>4:
+ if words[i][-1] == 'd':
+ if words[i][-2] == 'o':
+ if words[i][-3] == 'o':
+ if words[i][-4] == 'h':
+ if words[i] not in hoods:
+ hoods.append(words[i])
+ return hoods
+def count_frequency(fname, word):
+ words = opentext(fname)
+ word_freq = 0
+ for i in range(len(words)):
+ if words[i] == word:
+ word_freq += 1
+ return word_freq
+def main():
+ fname = input('Введите имя файла: ')
+ hoods = adj_hood(fname)
+ print('В тексте встретилось', len(hoods), 'прилагательных с суффиксом -hood.')
+ freq = []
+ for i in range(len(hoods)):
+ freq.append(count_frequency(fname, hoods[i]))
+ min_freq = []
+ for i in range(len(hoods)):
+ if freq[i] == min(freq):
+ min_freq.append(hoods[i])
+ print('Самые редкие прилагательные с суффиксом -hood: ', ', '.join(min_freq))
+ roots = []
+ for i in range(len(hoods)):
+ roots.append(hoods[i][0:-4])
+ print('Корни прилагательных с суффиксом -hood: ', ', '.join(roots))
+if __name__ == '__main__':
+ main()
+
+
+
+words = []
+with open('text.txt','r', encoding = 'utf-8') as f:
+ text = f.read()
+ words_raw = text.split()
+ words = []
+ for i in range(len(words_raw)):
+ words.extend(words_raw[i].split('\n'))
+len3 = 0
+len1 = 0
+for word in words:
+ if len(word) == 3:
+ len3 += 1
+ elif len(word) == 1:
+ len1 += 1
+if len1 == 0:
+ print('В файле нет слов длины 1.')
+elif len3 == 0:
+ print('В файле нет слов длины 3.')
+else:
+ print('В файле в '+str(len3/len1)+' раз больше слов длины 3, чем слов длины 1.')
+
+
+
+import random
+def nom_noun():
+ with open('nomnouns.txt','r', encoding = 'utf-8') as f:
+ nomnouns = f.read().split('\n')
+ return random.choice(nomnouns)
+def acc_noun():
+ with open('accnouns.txt','r', encoding = 'utf-8') as f:
+ accnouns = f.read().split('\n')
+ return random.choice(accnouns)
+def adverb():
+ with open('adverbs.txt','r', encoding = 'utf-8') as f:
+ adverbs = f.read().split('\n')
+ return random.choice(adverbs)
+def intensifier(adv):
+ with open('intensifiers.txt','r', encoding = 'utf-8') as f:
+ intensifiers = f.read().split('\n')
+ return random.choice(intensifiers) + ' ' + adv
+def verb_of_thought(subj):
+ with open('thoughtverbs.txt','r', encoding = 'utf-8') as f:
+ thoughtverbs = f.read().split('\n')
+ return subj + ' ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.'
+def trans_verb(subj, obj):
+ with open('transverbs.txt','r', encoding = 'utf-8') as f:
+ transverbs = f.read().split('\n')
+ return subj + ' ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj
+def trans_verb_negative(subj, obj):
+ with open('transverbs.txt','r', encoding = 'utf-8') as f:
+ transverbs = f.read().split('\n')
+ negative_sentences = [subj + ' не ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj, subj + ' ' + intensifier(adverb()) + ' не ' + random.choice(transverbs)+ ' ' + obj]
+ return random.choice(negative_sentences)
+def verb_of_thought_negative(subj, obj):
+ with open('thoughtverbs.txt','r', encoding = 'utf-8') as f:
+ thoughtverbs = f.read().split('\n')
+ return subj + ' не ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.'
+def positive():
+ positive_sentences = [trans_verb(nom_noun(), acc_noun()) + '.', verb_of_thought(nom_noun())]
+ return random.choice(positive_sentences)
+def question():
+ questions = ['зачем ' + trans_verb(nom_noun(), acc_noun()) + '?', 'почему ' + verb_of_thought(nom_noun())]
+ return random.choice(questions)
+def negative():
+ negative_sentences = [verb_of_thought_negative(nom_noun(), acc_noun()), trans_verb_negative(nom_noun(), acc_noun())]
+ return random.choice(negative_sentences)
+def conditional():
+ with open('transverbs.txt','r', encoding = 'utf-8') as f:
+ transverbs = f.read().split('\n')
+ conditional_sentences = ['если ' + positive().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + positive().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun()]
+ return random.choice(conditional_sentences)
+def imperative():
+ with open('imperatives.txt','r', encoding = 'utf-8') as f:
+ imperatives = f.read().split('\n')
+ imperative_sentences = ['пусть ' + positive(), 'пусть ' + negative(), 'пусть ' + conditional(), random.choice(imperatives) + ' ' + acc_noun()]
+ return random.choice(imperative_sentences)
+def main():
+ sentences = [positive(), question(), negative(), conditional(), imperative()]
+ random.shuffle(sentences)
+ for i in range(5):
+ print(sentences[i].capitalize())
+if __name__ == '__main__':
+ main()
+
+
+
+a = input("Введите первое число. ")
+a = float(a)
+b = input("Введите второе число. ")
+b = float(b)
+c = input("Введите третье число. ")
+c = float(c)
+if b:
+ if a%b == c:
+ print("a даёт остаток c при делении на b")
+ else:
+ print("a не даёт остаток c при делении на b")
+else:
+ print("Делите на ноль сами!")
+if (a*c)+b == 0:
+ print("c является решением линейного уравнения ax + b = 0")
+else:
+ print("c не является решением линейного уравнения ax + b = 0")
+list_list = [['l'],['s'],['d']]
+def el_0(any_list):
+ return any_list[0]
+a = ' '.join([el_0(el) for el in list_list])
+print(a)
+
+b = list(map(el_0, list_list))
+print(b)
+
+b = ' '.join(list(map(el_0, list_list)))
+print(b)
+
+c = ' '.join(list(map(lambda any_list: any_list[0], list_list)))
+print(c)
+
+number = input("print any number")
+square = int(number)**0.5
+if square/1 == square//1:
+ print ("yes")
+else:
+ print ("no")
+
+
+
+import codecs, re
+def open_file(title):
+ a = codecs.open(title, 'r', 'utf-8')
+ words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()]
+ return words
+def find_bigramm(words):
+ text = ''
+ for word in words:
+ text += word + ' '
+ found = 0
+ for x in range(len(words)-1):
+ bigramm = words[x] + ' ' + words[x+1]
+ m = re.findall(bigramm, text, flags = re.U)
+ if len(m) > 2:
+ print(True)
+ found = 1
+ break
+ if found == 0:
+ print(False)
+def main():
+ f = open_file('text.txt')
+ z = find_bigramm(f)
+if __name__ == "__main__":
+ main()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import os, codecs
+def open_file(title):
+ a = codecs.open(title, 'r', 'utf-8')
+ words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()]
+ return words
+def count_word_frequency(words):
+ freq_dict = {}
+ for word in words:
+ try:
+ freq_dict[word] += 1
+ except KeyError:
+ freq_dict[word] = 1
+ return freq_dict
+def find_max_keys(dict_num_values, amount):
+ values_list = dict_num_values.values()
+ max_values = []
+ i = 0
+ while i < amount:
+ local_max = max(values_list)
+ max_values.append(local_max)
+ if local_max != 1:
+ values_list = [x for x in values_list if x != local_max]
+ i += 1
+ max_keys = []
+ for key in dict_num_values:
+ if dict_num_values[key] in max_values:
+ max_keys.append(key)
+ return max_keys
+def extract_words_from_txt_in_folder(path):
+ words = []
+ for root, dirs, files in os.walk(path):
+ for f in files:
+ if len(f.split('.')) == 2:
+ file_name = f.split('.')[0]
+ file_ext = f.split('.')[1]
+ if file_ext == 'txt':
+ words += open_file(os.path.join(root, f))
+ return words
+def main():
+ print(find_max_keys(count_word_frequency(extract_words_from_txt_in_folder('.')),10))
+if __name__ == "__main__":
+ main()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import random
+with open('words.txt','r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ lenlines = len(lines)
+ random.shuffle(lines)
+ score = 0
+ for line in lines:
+ line = line.strip()
+ word, hint = line.split(' ',1)
+ response = input('Какое слово я загадала?\n'+
+ 'Подсказка: '+hint+' ')
+ if response == word:
+ print('Правильно!')
+ score += 1
+ else:
+ print('Нет, слово было', word)
+with open('scores.txt', 'w', encoding = 'utf-8') as n:
+ percent = score/lenlines*100
+ n.write('Вот результат: ')
+ n.write(str(percent)+'%')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import codecs
+def open_file(file_name):
+ f = codecs.open(file_name, 'r', 'utf-8')
+ words = []
+ for line in f:
+ line = line.strip()
+ words += line.split()
+ for word in words:
+ word = word.strip('.,!?:;()\'\"1234567890')
+ word = word.lower()
+ return words
+def bigramms(words):
+
+ bi = create_list(words)
+ dic = {}
+ for j in bi:
+ if j not in dic:
+ dic[j] = 1
+ else:
+ dic[j] += 1
+ answer = ''
+ answer = [n+'\r\n' for n in dic]
+ for key in dic:
+ if dic[key] > 2:
+ answer = True
+ else:
+ answer = False
+ print(answer)
+ return answer
+def create_list(words):
+ bi = []
+ for i in range(len(words)):
+ if i < len(words) - 1:
+ j = i+1
+ bi.append(words[i] + ' ' + words[j])
+ return bi
+words = open_file('text.txt')
+bigramms(words)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+def main():
+ with open ('china space programm.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ reg = '«[А-ЯЁа-яё]+?-[1-9]+»'
+ all_matches = re.findall(reg, text)
+ pure_names =[]
+ for i in range(len(all_matches)):
+ if re.sub(r'-[1-9]+', '', all_matches[i]) not in pure_names:
+ pure_names.append(re.sub(r'-[1-9]+', '', all_matches[i]))
+ all_matches += pure_names
+ print(all_matches)
+if __name__ == '__main__':
+ main()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def opentext(fname):
+ forms = []
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ forms = text.split()
+ for i in range(len(forms)):
+ forms[i] = forms[i].strip('.,?*()«»')
+ return forms
+
+
+
+def first_letter(letter, fname = 'text.txt'):
+ starting_with_letter = []
+ forms = opentext(fname)
+ for i in range(len(forms)):
+ if forms[i][0] == letter:
+ starting_with_letter.append(forms[i])
+ return starting_with_letter
+
+
+
+def questions():
+ fname = input('Введите имя файла: ')
+ letter = input('Введите букву: ')
+ number = int(input('Введите целое число: '))
+ starting_with_letter = first_letter(letter, fname)
+ answer = []
+ for i in range(len(starting_with_letter)):
+ if len(starting_with_letter[i]) > number:
+ answer.append(starting_with_letter[i])
+ return answer
+
+
+
+def adjectives(fname):
+ forms = opentext(fname)
+ adj = []
+ for i in range(len(forms)):
+ if len(forms[i]) > 2:
+ if forms[i][-1] == 'й':
+ if forms[i][-2] == 'o' or forms[i][-2] == 'ы' or forms[i][-2] == 'и':
+ if i != len(forms)-1:
+ adj.append(forms[i]+' '+forms[i+1])
+ else:
+ adj.append(forms[i])
+ elif forms[i][-1] == 'я':
+ if forms[i][-2] == 'а' or forms[i][-2] == 'я':
+ if i != len(forms)-1:
+ adj.append(forms[i]+' '+forms[i+1])
+ else:
+ adj.append(forms[i])
+ elif forms[i][-1] == 'е':
+ if forms[i][-2] == 'o' or forms[i][-2] == 'е':
+ if i != len(forms)-1:
+ adj.append(forms[i]+' '+forms[i+1])
+ else:
+ adj.append(forms[i])
+ return adj
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+with open('information.txt', 'w', encoding = 'utf-8') as n:
+ name = input('Как Вас зовут? ')
+ n.write(name+'\n')
+ age = input('Сколько Вам лет? ')
+ n.write(str(age)+'\n')
+ color = input('Какой у Вас любимый цвет? ')
+ n.write(color+'\n')
+ musician = input('Какой у Вас любимый исполнитель? ')
+ n.write(musician+'\n')
+ dream = input('Какая у Вас мечта? ')
+ n.write(dream+'\n')
+with open('information about Mary.txt','r', encoding = 'utf-8') as f:
+ info = f.readlines()
+ for line in range(len(info)):
+ info[line] = info[line].strip()
+ response = input('Как Вашего соседа зовут? ')
+ if response == info[0]:
+ print('Правильно!')
+ else:
+ print('Нет, его зовут '+info[0]+'.')
+ response = input('Сколько Вашему соседу лет? ')
+ if str(response) == info[1]:
+ print('Правильно!')
+ else:
+ print('Нет, ему '+info[1]+' лет.')
+ response = input('Какой у Вашего соседа любимый цвет?')
+ if response == info[2]:
+ print('Правильно!')
+ else:
+ print('Нет, его любимый цвет - '+info[2]+'.')
+ response = input('Какой у Вашего соседа любимый исполнитель?')
+ if response == info[3]:
+ print('Правильно!')
+ else:
+ print('Нет, его любимый исполнитель - '+info[3]+'.')
+ response = input('Какая у Вашего соседа мечта?')
+ if response == info[4]:
+ print('Правильно!')
+ else:
+ print('Нет, его мечта - '+info[4]+'.')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+number = input("What's your phone number")
+if '+1' in number or number.endswith("2"):
+ pass
+elif '+7' in number or number.startswith('8'):
+ print("Как дела в России?")
+elif '+4' in number:
+ print("Как дела в Англии?")
+else:
+ print("Как дела в мире?")
+
+
+
+
+
+
+
+
+import os, codecs
+from math import log
+def preprocess(text):
+ punct = '[.,!«»?&@"$\[\]\(\):;%
+ tabs = '\t\n'
+ text_wo_punct = re.sub(punct, '', text.lower())
+ text_wo_punct = re.sub(tabs, '', text_wo_punct)
+ words = text_wo_punct.strip().split()
+ return words
+def count_tf(word, text):
+ n = text.count(word)
+ return n/len(text)
+def count_df(word, texts):
+
+
+
+
+ i = [True for text in texts if word in text]
+ df = len(i)
+ return df
+def count_idf(word, texts):
+ df = count_df(word, texts)
+ try:
+ idf = len(texts)/df
+ except ZeroDivisionError:
+ return 0
+ return idf
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ tfidf = log(tf, 10) * log(idf, 10)
+ return tfidf
+def extract_textS_from_folder(path):
+ texts = []
+ for root, dirs, files in os.walk(path):
+ for f in files:
+ with open(os.path.join(root, f) , "r", encoding = 'utf-8') as t:
+ content = t.read
+ text = preprocess(content)
+ texts.append(text)
+ return texts
+def keywords(text, texts):
+ keywords = {}
+ dic_tfidf = {}
+ for word in text:
+ if word in dic_tfidf:
+ continue
+ tfidf = count_tfidf(word, text, texts)
+ dic_tfidf[word] = tfidf
+ i = 0
+ for el in sorted(dic_tfidf, key = lambda x: dic_tfidf(x)):
+ if i > 5:
+ break
+ else:
+ i += 1
+ keywords[el] = dic_tfidf[el]
+ return keywords
+def main():
+ texts = extract_text_from_folder('wikipedia')
+ for t in texts:
+ kwords = keywords(t, texts)
+ for key in kwords:
+ print(key, kwords[key])
+if __name__ == "__main__":
+ main()
+
+
+
+
+import os
+import re
+from math import log
+def preprocessing(text):
+ punct = '[.,_!«»?&@"$\/\\[\]\(\):;%
+ tabs = '\n\t\s'
+ num = '[0-9]'
+ text_wo_punct = re.sub(punct, '', text.lower())
+ text_wo_punct = re.sub(tabs, ' ', text_wo_punct)
+ text_wo_punct = re.sub(num, '', text_wo_punct)
+ words = text_wo_punct.strip().split()
+ return words
+def count_tf(word, text):
+ i = 0
+ for w in text:
+ if w == word:
+ i += 1
+
+ tf = i / len(text)
+ return tf
+def count_df(word, texts):
+ i = 0
+
+
+
+
+
+ i = [1 for text in texts if word in text]
+ df = sum(i)
+ return df
+def count_idf(word, texts):
+ df = count_df(word, texts)
+ idf = len(texts)/ (1 + df)
+ return idf
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ tfidf = log(tf, 10) * log(idf, 10)
+ return tfidf
+def keywords(text, texts):
+ dic_tfidf = {}
+ kwords = {}
+ for word in text:
+ if word in dic_tfidf:
+ continue
+ tfidf = count_tfidf(word, text, texts)
+ dic_tfidf[word] = tfidf
+ i = 0
+ for el in sorted(dic_tfidf, key=lambda x: dic_tfidf[x]):
+ if i > 5:
+ break
+ else:
+ i += 1
+ kwords[el] = dic_tfidf[el]
+ return kwords
+def main():
+ texts = {}
+ for root, dirs, files in os.walk('wikipedia'):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
+ content = t.read()
+ text = preprocessing(content)
+ texts[f] = text
+ raw_texts = list(texts.values())
+ for t in texts:
+ print('\nИзвлекаем ключевые слова для текста "{}"'.format(t.split('.')[0]))
+ kwords = keywords(texts[t], raw_texts)
+ for key in kwords:
+ print (key, kwords[key])
+if __name__ == '__main__':
+ main()
+import re
+import os
+from math import log
+
+def open_words(fname):
+ forms = []
+ with open (fname, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ punct = '[.,?!|:;*№\"\'—@
+ text = re.sub(punct, '', text)
+ text = re.sub('[\n\t]', ' ', text)
+ forms = text.split()
+
+
+ return forms
+
+def make_freq(arr):
+ d = {}
+ for el in arr:
+ try:
+ d[el] += 1
+ except KeyError:
+ d[el] = 1
+ return d
+
+def make_bigrams(arr):
+ bigrams = []
+ for i in range(len(arr)-1):
+ bigr = arr[i] + ' ' + arr[i+1]
+ bigrams.append(bigr)
+ return bigrams
+
+def count_pmi(x, y):
+ try:
+ p_x = word_freq[x]/len(words)
+ except KeyError:
+ p_x = 0
+ try:
+ p_y = word_freq[x]/len(words)
+ except KeyError:
+ p_y = 0
+ try:
+ bigr = x + ' ' + y
+ p_xy = bigrams_freq[bigr]/len(bigrams)
+ except KeyError:
+ p_xy = 0
+ try:
+ pmi = log(p_xy/(p_x*p_y))
+ except ZeroDivisionError:
+ pmi = 0
+ return pmi
+
+def calculate_pmi():
+ pmis = {}
+ for bigr in bigrams:
+ x, y = bigr.split()
+ pmi = count_pmi(x, y)
+ pmis[bigr] = pmi
+ return pmis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def calculate_pmi_cats(word, cathegory):
+ p_word = freq_all[word]/len(words_all)
+ p_cat = 1/3
+ if cathegory == 'anek':
+ d = freq_anek
+ w = len(corpus_anek_words)
+ elif cathegory == 'izvest':
+ d = freq_izvest
+ w = len(corpus_izvest_words)
+ elif cathegory == 'teh':
+ d = freq_teh
+ w = len(corpus_teh_words)
+ p_word_cat = d[word]/w
+ pmi = log(p_word_cat/(p_word*p_cat))
+ return pmi
+def main():
+ corpus_anek_words = []
+ corpus_izvest_words = []
+ corpus_teh_words = []
+ for root, dirs, files in os.walk('texts'):
+ if 'anekdots' in root:
+ for f in files:
+ corpus_anek_words += open_words(os.path.join(root, f))
+ if 'teh_mol' in root:
+ for f in files:
+ corpus_teh_words += open_words(os.path.join(root, f))
+ if 'izvest' in root:
+ for f in files:
+ corpus_izvest_words += open_words(os.path.join(root, f))
+ words = corpus_anek_words + corpus_teh_words + corpus_izvest_words
+ freq_anek = make_freq(corpus_anek_words)
+ freq_izvest = make_freq(corpus_izvest_words)
+ freq_teh = make_freq(corpus_teh_words)
+ freq_all = make_freq(words)
+ words_cathegory_dict = {}
+ for w in words:
+ i = 0
+ try:
+ if i < 100:
+ pmi_anek = calculate_pmi_cats(w, 'anek')
+ pmi_cats(w, 'anek')
+ pmi_izvest = calculate_pmi_cats(w, 'izvest')
+ pmi_teh = calculate_pmi_cats(w, 'teh')
+ pmi_max = max(pmi_anek, pmi_izvest, pmi_teh)
+ if pmi_max == pmi_anek:
+ words_cathegory_dict[w] = 'anek'
+ if pmi_max == pmi_teh:
+ words_cathegory_dict[w] = 'teh'
+ if pmi_max == pmi_anek:
+ words_cathegory_dict[w] = 'teh'
+ i += 1
+ except KeyError:
+ pass
+ print(words_cathegory_dict)
+if __name__ == '__main__':
+ main()
+
+
+
+
+import shutil
+import os
+name = input('Print any sentence. ')
+words = name.split()
+path = words[0]
+for i in range(1, len(words)):
+ path = os.path.join(path, words[i])
+os.makedirs(path)
+
+
+
+import shutil
+import os
+num = int(input('Print any natural number. '))
+for i in range(num):
+ name = str(i+1)
+ os.makedirs(name)
+ for a in range(i+1):
+ filename = os.path.join(name,str(a+1)+'.txt')
+ with open(filename, 'w', encoding = 'utf-8') as f:
+ f.write('')
+
+import os
+import shutil
+filelist = [f for f in os.listdir() if os.path.isfile(f)]
+print(filelist)
+
+import os
+import shutil
+path = os.path.abspath('.')
+path2 = os.getcwd()
+universalpath = os.path.join('texts', '1.txt')
+exists = os.path.exists('texts\1.txt')
+exists2 = os.path.exists(os.path.join('texts', '1.txt'))
+filelist = os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts')
+s = 'Hello! '
+i = 1
+for f in filelist:
+ if f.endswith('.txt'):
+ with open(f, 'a', encoding = 'utf-8') as w:
+ w.write(s*1)
+ i += 1
+texts = [f for f in os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') if f.endswith('.txt')]
+if not os.path.exists('ab'):
+ os.mkdir('ab')
+if not os.path.exists(r'a\long\long\long\long\path'):
+ os.makedirs(r'a\long\long\long\long\path')
+if os.path.exists('ab') and not os.path.exists('abc'):
+ os.rename('ab', 'abc')
+if os.path.exists(r'a\long\long\long') and not os.path.exists(r'a\long\long\longer'):
+ os.rename(r'a\long\long\long', r'a\long\long\longer')
+isfile = os.path.isfile(r'texts\1.txt')
+isdir = os.path.isdir(r'a\long\long')
+print(os.listdir())
+shutil.copy(r'texts\1.txt', r'newcorpus')
+shutil.copytree(r'texts', r'corpus')
+shutil.move(r'texts\2.txt', r'newcorpus')
+os.remove(r'corpus\2.txt')
+shutil.rmtree('newcorpus')
+shutil.rmtree('a')
+shutil.rmtree('abc')
+
+
+
+
+import os
+import shutil
+directory = input('Print any path working in your OS. ')
+if os.path.exists(directory):
+ filelist = [f for f in os.listdir() if os.path.isfile(f)]
+ extlist = []
+ for f in filelist:
+ ext = f.split('.')[1]
+ if ext not in extlist:
+ extlist.append(ext)
+ extdict = {}
+ for ext in extlist:
+ for f in filelist:
+ if f.endswith(ext):
+ if ext not in extdict:
+ extdict[ext] = 1
+ else:
+ extdict[ext] +=1
+else:
+ directory = os.getcwd()
+print(extdict)
+
+
+import re
+def main():
+ with open ('hse.html', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ card_reg = ''
+ if re.search(card_reg, text):
+ card = re.search(card_reg, text).group()
+ t_reg = 'Преподаватели(?:.|\n)*?(.+?)<'
+ if re.search(t_reg, card):
+ profs = re.search(t_reg, card).group(1)
+ with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
+ f.write(profs)
+ else:
+ print('No data about the nuber of professors found!')
+ with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
+ f.write('No data about the nuber of professors found!')
+ else:
+ print('No card found in this article!')
+ with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
+ f.write('No card found in this article!')
+if __name__ == '__main__':
+ main()
+import os
+import re
+def tagsaway(sentence):
+ s = ''
+ for word in sentence:
+ word = re.sub(u'<.+?>', u'', word)
+ s = s + word + ' '
+ return s
+def get_bigramms(text):
+ bi = []
+ text = text.split('')
+ for i, word in enumerate(text):
+ if 'gr="A=' and 'gen' in word:
+ if i+1 < len(text):
+ w = text[i+1]
+ if 'gr="S,' and 'gen' in w:
+ result1 = re.search('(.+?)', word)
+ result2 = re.search('(.+?)', w)
+ bi.append([result1.group(1), result2.group(1), tagsaway(text)])
+ return bi
+def newfile(arr):
+ f = open('bigramms.txt', 'w', encoding = 'utf8')
+ s = ''
+ for i in arr:
+ s = i[0] + '\t' + i[1] + '\t' + i[2] + '\n'
+ f.write(s)
+ s = ''
+ f.close
+def filework():
+ folder = 'news'
+ for file in os.listdir(folder):
+ with open(os.path.join(folder, file)) as text:
+ text = text.read().split('')
+ for se in text:
+ newfile(get_bigramms(se))
+def main ():
+ filework()
+main()
+
+
+import os
+import re
+def get_author (text):
+ for word in text:
+ if 'name="author"' in word:
+ result = re.search('content="(.+?)"', word)
+ return result.group(1)
+def get_day (text):
+ for word in text:
+ if 'name="created"' in word:
+ result = re.search('content="(.+?)"', word)
+ return result.group(1)
+def file_inf():
+ ff = []
+ folder = 'news'
+ for file in os.listdir(folder):
+ with open(os.path.join(folder, file)) as text:
+ text = text.read().split('<')
+ ff.append([file, get_author(text), get_day(text)])
+ return ff
+def newfile(arr):
+ f = open('files_info.csv', 'w', encoding = 'utf8')
+ f.write('Название файла;Автор;Дата создания текста\n')
+ s = ''
+ for i in arr:
+ s = i[0] + ';' + i[1] + ';' + i[2] + '\n'
+ f.write(s)
+ s = ''
+ f.close
+def main ():
+ ff = file_inf()
+ newfile(ff)
+main()
+import os
+import re
+def files():
+ ff = {}
+ folder = 'news'
+ for file in os.listdir(folder):
+ with open(os.path.join(folder, file)) as text:
+ words = re.findall('', text.read())
+ ff[file] = len(words)
+ return ff
+def newfile(dic):
+ f = open('words_in_files.txt', 'w', encoding = 'utf8')
+ s = ''
+ for k in dic:
+ s = k + '\t' + str(dic[k]) + '\n'
+ f.write(s)
+ s = ''
+ f.close
+def main ():
+ ff = files()
+ newfile(ff)
+main()
+import os
+def findanddel (folder):
+ for root, dirs, files in os.walk(folder, topdown = False):
+ for f in files:
+ os.remove(os.path.join(root, f))
+ for d in dirs:
+ os.remove(os.path.join(root, d))
+def main ():
+ folder = input()
+ findanddel (folder)
+main ()
+import os
+def draw ():
+ for root, dirs, files in os.walk ('.'):
+ for d in dirs:
+ print ('\t'*root.count('\\'), '--',d)
+ for f in files:
+ print ('\t'*root.count('\\'), f)
+def main ():
+ draw()
+main ()
+def pointsaway (file):
+ file = file.split()
+ for i, word in enumerate (file):
+ file[i] = file[i].strip('.,?!()*&^%$
+ file[i] = file[i].lower()
+ return file
+def words (file):
+ slova = {}
+ for word in file:
+ if word in slova:
+ slova[word] += 1
+ else:
+ slova[word] = 1
+ return slova
+def creation (dic):
+ f = open ('file.tsv', 'w', encoding = 'utf8')
+ arr = []
+ for k in dic:
+ arr.append(k)
+ arr.sort()
+ for i in arr:
+ f.write(i + '\t' + str(dic[i]) + '\n')
+ f.close()
+def main ():
+ f = open ('file.txt', 'r', encoding = 'utf8')
+ file = f.read()
+ f.close()
+ text = pointsaway (file)
+ semua = words (text)
+ creation (semua)
+main ()
+def pointsaway (file):
+ file = file.split()
+ for i, word in enumerate (file):
+ file[i] = file[i].strip('.,?!()*&^%$
+ file[i] = file[i].lower()
+ return file
+def creation (text):
+ f = open ('new.txt', 'w', encoding = 'utf8')
+ dic = {text[x]: x for x in range(0, len(text))}
+ arr = [k for k in dic]
+ arr.sort()
+ for i in arr:
+ f.write('{}\t{}\n'.format(i, str(dic[i])))
+ f.close()
+def main ():
+ f = open ('file.txt', 'r', encoding = 'utf8')
+ file = f.read()
+ f.close()
+ text = pointsaway (file)
+ creation (text)
+main ()
+import re
+def get_word (word):
+ result = re.search('.*?(\w+)', word)
+ if result:
+ return result.group(1)
+ else:
+ return None
+def find_ins (text):
+ inst = {}
+ for i, word in enumerate(text):
+ if 'gr="S' in word:
+ if 'ins' in word:
+ inst[i]=word
+ return inst
+def newfile (words, text):
+ f = open ('ins.txt', 'w', encoding = 'utf8')
+ s = ''
+ for k in words:
+ i = 0
+ j = 1
+ while i<3:
+ if get_word(text[k-j]) != None:
+ s = get_word(text[k-j])+ ' ' + s
+ i += 1
+ j += 1
+ else:
+ j += 1
+ s = s + '\t' + get_word(words[k]) + '\t'
+ i = 0
+ j = 1
+ while i<3:
+ if get_word(text[k+j]) != None:
+ s = s + ' ' + get_word(text[k+j])
+ i += 1
+ j += 1
+ else:
+ j +=1
+ f.write(s)
+ f.close
+def main ():
+ f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8')
+ file = f.read()
+ text = file.split('\n')
+ f.close()
+ ss = find_ins(text)
+ newfile (ss, text)
+main ()
+import re
+def find_and_count (file):
+ pos = {}
+ for word in file:
+ word = word.split('<')
+ for part in word:
+ result = re.search('.*?gr="(\w+)', part)
+ if result:
+ print (result.group(1))
+ if result.group(1) not in pos:
+ pos[result.group(1)] = 1
+ else:
+ pos[result.group(1)] += 1
+ return pos
+def newfile (dic):
+ f = open('pos.txt', 'w', encoding = 'utf8')
+ s = ''
+ for k in dic:
+ s = s + k + '\t' + str(dic[k]) + '\n'
+ f.write(s)
+ f.close
+def main ():
+ f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8')
+ file = f.read()
+ text = file.split('\n')
+ f.close()
+ pos = find_and_count(text)
+ newfile (pos)
+main ()
+import re
+
+def find_w (file):
+ words = re.findall('', file)
+ n = len(words)
+ return n
+def find_ana (file):
+ anas = re.findall('(.+?)', line)
+ if a:
+ if a.group(2) not in types:
+ types[a.group(2)] = 0
+ return types
+def countthem (file, types):
+ words = []
+ sum = 0
+ for key in types:
+ words.append(key)
+ for el in words:
+ for line in file:
+ if '"'+el+'"' in line:
+ sum += 1
+ types[el] = sum
+ sum = 0
+ return types
+def newfile (types):
+ s = ''
+ f = open('adj.txt', 'w', encoding = 'utf8')
+ for key in types:
+ s = s + key + '-' + str(types[key]) + '\n'
+ f.write(s)
+ f.close()
+def main():
+ text = filework()
+ dic = findthem(text)
+ dic = countthem (text, dic)
+ newfile(dic)
+main()
+import random
+file=open ('file_6.6.txt', 'r')
+def noun():
+ nouns=[]
+ for line in file:
+ if ' n ' in line:
+ line=line.split(' ')
+ nouns.append(line[0])
+ file.seek(0, 0)
+ return random.choice(nouns)
+def pronoun():
+ pronouns=[]
+ for line in file:
+ if ' pn ' in line:
+ line=line.split(' ')
+ pronouns.append(line[0])
+ file.seek(0, 0)
+ return random.choice(pronouns)
+def verb():
+ verbs=[]
+ for line in file:
+ if ' v ' in line:
+ line=line.split(' ')
+ verbs.append(line[0])
+ file.seek(0, 0)
+ return random.choice(verbs)
+def adjective ():
+ adjectives=[]
+ for line in file:
+ if ' adj ' in line:
+ line=line.split(' ')
+ adjectives.append(line[0])
+ file.seek(0, 0)
+ return random.choice(adjectives)
+def suborob (n, adj, pn):
+ x=random.randint(0,1)
+ if x==0:
+ return pn
+ else:
+ y=random.randint(0,1)
+ if y==0:
+ return n+' '+adj
+ else:
+ return n+' '+pn
+def declarative (subj, v, obj):
+ return subj.capitalize()+' '+v+' '+obj+'.'
+def question (subj, v):
+ x=random.randint (0, 1)
+ if x==0:
+ return 'Apa'+' '+subj+' '+v+'?'
+ else:
+ return 'Siapa'+' '+v+'?'
+def negative (subj, v, obj):
+ x=random.randint(0, 1)
+ if x==0:
+ return subj.capitalize()+' tidak '+v+' '+obj+'.'
+ else:
+ return subj.capitalize()+' bukan '+obj+'.'
+def imperative (v, obj):
+ x=random.randint(0,1)
+ if x==0:
+ return v.capitalize()+' '+obj+'!'
+ else:
+ return 'Jangan '+v+' '+obj+'!'
+def conditional (subj1, v1, obj1, subj2, v2, obj2):
+ return 'Kalau '+subj1+' '+v1+' '+obj1+', '+subj2+' '+v2+' '+obj2+'.'
+def sequence ():
+ a=[1, 2, 3, 4, 5]
+ b=[]
+ for i in range (5):
+ x=random.choice(a)
+ while x in b:
+ x=random.choice(a)
+ b.append(x)
+ return b
+def text():
+ seq=sequence()
+ for i in range (5):
+ if seq[i]==1:
+ print(declarative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
+ elif seq[i]==2:
+ print (question(suborob(noun(), adjective(), pronoun()), verb()))
+ elif seq[i]==3:
+ print (negative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
+ elif seq[i]==4:
+ print (imperative(verb(), suborob(noun(), adjective(), pronoun())))
+ else:
+ print (conditional(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()), suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
+text()
+file.close()
+import re
+def search (text):
+ otr = re.search ('\nОтряд: | \n(.+?) | \n ', text)
+ if otr:
+ result = otr.group(3)
+ return result
+def main ():
+ f = open('file_10.6.html', 'r', encoding = 'utf8')
+ file = f.read()
+ f.close()
+ ans = search (file)
+ print (ans)
+main()
+def pointsaway (file):
+ file = file.replace('?!', '.')
+ file = file.split('.')
+ for i, word in enumerate (file):
+ file[i] = file[i].replace('.,?!()*&^%$
+ file[i] = file[i].replace('-- ', ' ')
+ file[i] = file[i].lower()
+ return file
+def tenplus (text):
+ for sentence in text:
+ sentence = sentence.split()
+ n=0
+ s=0
+ for word in sentence:
+ word = word.strip('.,?!()*&^%$
+ s+=len(word)
+ n+=1
+ if n>10:
+ print ("Это предложение со словами длины %s"%(str(round(s/n, 1))))
+def main ():
+ f = open ("file_12.6.txt", "r", encoding = "utf8")
+ file = f.read()
+ f.close()
+ text = pointsaway (file)
+ tenplus (text)
+main ()
+import random
+def intothedic (file):
+ dic={}
+ for line in file:
+ line = line.split(';')
+ for j, word in enumerate(line):
+ line[j] = word.strip('\n')
+ dic[line[0]] = line[1]
+ return dic
+def youchoose (dic):
+ keys = []
+ for key in dic:
+ keys.append(key)
+ return random.choice(keys)
+def thegameison (noun, dic):
+ for key in dic:
+ if key == noun:
+ hint = dic[key]
+ n=key
+ break
+ print (hint, '...')
+ for i in range (3):
+ if input() == n:
+ print ('Победа!')
+ break
+ else:
+ if i == 0:
+ print ('Ещё 2 попытки')
+ continue
+ elif i == 1:
+ print ('Ещё 1 попытка')
+ continue
+ else:
+ print ('GAME OVER')
+f = open('file_8.6.csv', 'r', encoding = 'utf8')
+file = f.readlines()
+f.close()
+words = intothedic(file)
+word = youchoose(words)
+thegameison(word, words)
+import re
+def pointsaway (file):
+ file = file.split()
+ for i, word in enumerate (file):
+ file[i] = file[i].strip('.,?!()*&^%$
+ file[i] = file[i].lower()
+ return file
+def findverbs (file):
+ verbs = []
+ for word in file:
+ if re.search ('загру(з(и.*|ят.*)|ж(у.*|ен.*))', word) != None:
+ if word not in verbs:
+ verbs.append(word)
+ return verbs
+f = open ('file_9.6.txt', 'r', encoding = 'utf8')
+file = f.read()
+f.close()
+file = pointsaway(file)
+verbs = findverbs(file)
+print (verbs)
+def filework ():
+ f = open('corpus.txt', 'r', encoding = 'utf8')
+ file = f.readlines()
+ f.close()
+ return file
+def newfile (text):
+ f = open('lines.txt', 'w', encoding = 'utf8')
+ f.write(str(len(text)))
+ f.close
+def main():
+ text = filework()
+ newfile(text)
+main()
+import os
+def names(array):
+ names = []
+ for name in array:
+ if os.path.isfile(name):
+ names.append(name)
+ return names
+def haspoints(array):
+ points = ',!_-'
+ s = 0
+ su = 0
+ for name in array:
+ for c in name:
+ if c in points:
+ s += 1
+ if s > 0:
+ su += 1
+ s = 0
+ print ("Знаки препинания есть в названии такого количества файлов: ", su)
+def main():
+ files = names (os.listdir('.'))
+ print (os.listdir('.'))
+ haspoints (files)
+main()
+a=input ('Введите число ')
+for i in range (1, 11):
+ print (i, '*', a, '=', i*int(a))
+xs=[]
+for i in range (7):
+ xs.append(int(input()))
+for i in range (7):
+ if xs[i]>0:
+ for j in range (xs[i]):
+ print ('x', end='')
+ print ('\n')
+ else:
+ print ('\n')
+word=input()
+l=len(word)
+while l>0:
+ word=list(word)
+ x=word.pop(0)
+ print (''.join(word))
+ l=len(word)
+def pointsaway (file):
+ file = file.split()
+ for i, word in enumerate (file):
+ file[i] = file[i].strip('.,?!()*&^%$
+ file[i] = file[i].lower()
+ return file
+def findomni (file):
+ omni = []
+ for word in file:
+ if word.startswith('omni'):
+ w = word.replace('omni', '')
+ omni.append([word, w])
+ return omni
+def findwords (array, file):
+ n = 0
+ m = 0
+ for i, k in enumerate (array):
+ for word in file:
+ if word == array[i][0]:
+ n += 1
+ if word == array[i][1]:
+ m += 1
+ print (array[i][0], n, '-', array[i][1], m)
+ n = 0
+ m = 0
+def main ():
+ name = input('Введите имя файла ')
+ f = open (name, "r")
+ file = f.read()
+ f.close()
+ file = pointsaway (file)
+ findwords(findomni(file), file)
+main()
+import re
+def filework():
+ f = open('corpus.txt', 'r', encoding = 'utf8')
+ file = f.readlines()
+ f.close()
+ return file
+def findthem (file):
+ types = {}
+ for line in file:
+ a = re.search('(.+?)', line)
+ if a:
+ if a.group(2) not in types:
+ types[a.group(2)] = 0
+ return types
+def countthem (file, types):
+ words = []
+ sum = 0
+ for key in types:
+ words.append(key)
+ for el in words:
+ for line in file:
+ if '"'+el+'"' in line:
+ sum += 1
+ types[el] = sum
+ sum = 0
+ return types
+def newfile (types):
+ s = ''
+ f = open('types.txt', 'w', encoding = 'utf8')
+ for key in types:
+ s = s + key + '\n'
+ f.write(s)
+ f.close()
+def main():
+ text = filework()
+ dic = findthem(text)
+ dic = countthem (text, dic)
+ newfile(dic)
+main()
+capital='АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
+s=0
+cap=0
+f=open('file_5.6.txt', 'r', encoding='utf8')
+file=f.readlines()
+l=len(file)
+for i in range (0, l):
+ file[i]=file[i].split(' ')
+for i in range (0, l):
+ m=len(file[i])
+ for j in range (0, m):
+ if file[i][j][0] in capital:
+ cap+=1
+ s+=1
+print ((cap/s)*100)
+f.close()
+import xml.etree.ElementTree as a
+from os import walk
+def sent(filename):
+ tree = a.parse('./news/'+file)
+ root = tree.getroot()
+ tmp = root.findall('.//se')
+ return(len(tmp))
+def move(res,filename):
+ res_file = open(filename, 'w')
+ for item in res:
+ res_file.write(item+'\n')
+f = []
+words = []
+p = './news';
+for (dirpath, dirnames, filenames) in walk(p):
+ f.extend(filenames)
+ break
+for file in f:
+ words.append(file+'\t'+str(sent(file)))
+move(words,'counted.txt')
+n=[]
+while True:
+ word = input ('word:')
+ if word ==(''):break
+ elif word.endswith ('tur'):
+ n.append(word)
+ print ('/n'. join(n))
+d={'Germany':'Berlin','France':'Paris',
+ 'USA':'Washington DC',
+ 'Russia':'Moscow'}
+for key in d:
+ print (key+"*"+d[key])
+def capital(a):
+ a=input("Country:")
+ if a in d:
+ return (d[a])
+ else: print ("oops")
+def revert ():
+ k={}
+ countries=d.keys()
+ for key in countries:
+ k[d[key]]=key
+ return (k)
+a=revert()
+print(a)
+
+with open ('fr.txt', 'r', encoding = 'utf-8') as a:
+ text=a.readlines()
+ for line in text:
+ if 'союз' in line:
+ print (line)
+n = input("WORD: ")
+m = len(n)//2
+print (n[:m], n[:m-1:(-1)])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import os
+import shutil
+name=input ('напишите што-нибудь')
+f_name=name.replace(' ', '\\')
+os.makedirs(f_name)
+
+
+
+
+
+n = input ('word:')
+for i in range (len(n)):
+ print(n [-i-1::])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+def open_html('xenokeryx.html'):
+ with open ('xenokeryx.html', 'r', encoding='utf-8') as f:
+ content=f.read()
+ return content
+def find_links (content):
+ reg=r'(.*?)'
+ links=re.findall (reg, content)
+ return links
+text=open_html ('xenokeryx.html')
+links=find_links(text)
+for link in links [:20]:
+ print (link[1], '-->', link[0])
+a=open (input(), 'r', encoding='utf-8')
+b=0
+c=0
+for line in a:
+ arr=line.split()
+ b=b+len(arr)
+ for d in arr:
+ if len(d)>10:
+ c=c+1
+ a.close()
+ e=c/b*100
+ print (e, '%')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import os
+def sup():
+ for root,dirs,files in os.walk('.'):
+ num=root.count('\\')
+ root+ root.split('\\')[-1]
+ print ('\t'*(num), root, sep='--')
+ for f in files:
+ print ('\t'*(num+1), f)
+sup()
+def opentext (file.txt):
+ forms = []
+ with open (file.txt, 'r', encoding='utf-8') as a:
+ text=a.read()
+ forms=text.split()
+ for i in range(len(forms)):
+ forms[i]=forms[i].strip(.,?!:;())
+ return forms
+def word ():
+ a=opentext(file.txt)
+ b=[]
+ for i in range (len(a)):
+ if a[i][-1]=='s':
+ if a[i][-2]=='u':
+ if a[i][-3]=='o':
+ b.append(a[i])
+ print (b)
+ c=b.split()
+ d=str.count(c)
+ return d
+import random
+def noun ():
+ file=open('Mnoun.txt', 'r', encoding='utf-8')
+ f=readlines()
+ nouns=[]
+ for line in f:
+ nouns.append(line.split(" "))
+ return random.choise(nouns)
+def verb ():
+ file=open ('verb1.txt', 'r', encoding='utf-8')
+ f=readlines()
+ verbs=[]
+ for line in f:
+ verbs.append(line.split(" "))
+ return random.choise(verbs)
+def adj ():
+ file=open ('adj.txt', 'r', encoding='utf-8')
+ f=readlines()
+ adjectives=[]
+ for line in f:
+ adjectives.append(line.split(" "))
+ return random.choise(adjectives)
+def noun2 ():
+ file=open ('noun2.txt','r', encoding='utf-8')
+ f=readlines()
+ plnouns=[]
+ for line in f:
+ plnouns.append(line.split(" "))
+ return random.choise(plnouns)
+def conj():
+ conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"]
+ return "," + random.choise(conjs)
+def noun3 ():
+ file=open ('noun3.txt', 'r', encoding='utf-8')
+ f=readlines ()
+ fnouns=[]
+ for line in f:
+ fnouns.append (line.split(" "))
+ return random.choise(fnouns)
+def 2verb ():
+ file=open ('2verb.txt', 'r', encoding ='utf-8')
+ f=readlines ()
+ 2verbs=[]
+ for line in f:
+ 2verbs.append (line.split(" "))
+ return random.choise (2verbs)
+def sen ():
+ return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"."
+for i in range(5):
+ print (sen())
+import xml.etree.ElementTree as a
+from os import walk
+def sent(filename):
+ tree = a.parse('./news/'+file)
+ root = tree.getroot()
+ tmp = root.findall('.//se')
+ return(len(tmp))
+def move(res,filename):
+ res_file = open(filename, 'w')
+ for item in res:
+ res_file.write(item+'\n')
+f = []
+words = []
+p = './news';
+for (dirpath, dirnames, filenames) in walk(p):
+ f.extend(filenames)
+ break
+for file in f:
+ words.append(file+'\t'+str(sent(file)))
+move(words,'counted.txt')
+import xml.etree.ElementTree as a
+from os import walk
+import pandas as q
+def second(filename):
+ tree = a.parse('./news/'+file)
+ root = tree.getroot()
+ name = root.find(".//*[@name='author']")
+ topic = root.find(".//*[@name='topic']")
+ return(name.attrib['content']+":"+topic.attrib['content'])
+f = []
+d = []
+p = './news';
+for (dirpath, dirnames, filenames) in walk(p):
+ f.extend(filenames)
+for file in f:
+ tmp = second(file).split(':')
+ tmp_arr = [file,tmp[0],tmp[1]]
+ d.append(tmp_arr)
+df = q.DataFrame(d,columns=["название","автор","тема"])
+df.to_csv("2.csv", sep=';', encoding='windows-1251')
+import re
+def main ():
+ with open('F.xml', 'r', encoding='utf-8') as f:
+ text=f.readlines()
+ return text
+n=main ()
+m=len(n)
+print (m)
+import re
+dic = {}
+with open('f.xml') as f:
+ for row in f:
+ if(re.match(r'.*',row)):
+ arr = row.split("\"")
+ key = arr[3]
+ if key in dic:
+ dic[key]=dic[key]+1
+ else:
+ dic[key] = 1;
+ for key in dic.items():
+ print(key+" "+"\r\n")
+import os
+import shutil
+folder='.'
+print (os.listdir('.'))
+for f in os.listdir('.'):
+ with open (os.path.join(folder, f)) as text:
+ print('file: ', f)
+a=str_word_count(f, ' ')
+filelist = [f for f in os.listdir('.') if os.path.isfile(f)]
+if a>1:
+ print(filelist)
+def open_format(crab):
+ a = []
+ with open (crab.txt, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = re.sub('\.\.\.|[\.\?]', '!', text)
+ a = text.split('!')[:-1]
+ for i in range(len(a)):
+ a[i] = re.sub('[<>\*\.«»,\'\"]','', a[i])
+ a[i] = a[i].strip()
+ return a
+def repeat():
+ work=open_format (crab.txt)
+ words=re.findall(r'([a-zA-Z]+(?:[?:[\'-][a-zA-Z]+)*)',s)
+ res=[]
+ for x in range (a,z):
+ res.append (x)
+ print (res)
+import os
+os.listdir('.')
+file_tree=os.walk('.')
+names = {}
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ name = f.split('.')[0]
+ if name not in names:
+ names[name]=1
+ print(len(names))
+import random
+def noun ():
+ file=open('Mnoun.txt', 'r', encoding='utf-8')
+ f=readlines()
+ nouns=[]
+ for line in f:
+ nouns.append(line.split(" "))
+ return random.choise(nouns)
+def verb ():
+ file=open ('verb1.txt', 'r', encoding='utf-8')
+ f=readlines()
+ verbs=[]
+ for line in f:
+ verbs.append(line.split(" "))
+ return random.choise(verbs)
+def adj ():
+ file=open ('adj.txt', 'r', encoding='utf-8')
+ f=readlines()
+ adjectives=[]
+ for line in f:
+ adjectives.append(line.split(" "))
+ return random.choise(adjectives)
+def noun2 ():
+ file=open ('noun2.txt','r', encoding='utf-8')
+ f=readlines()
+ plnouns=[]
+ for line in f:
+ plnouns.append(line.split(" "))
+ return random.choise(plnouns)
+def conj():
+ conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"]
+ return "," + random.choise(conjs)
+def noun3 ():
+ file=open ('noun3.txt', 'r', encoding='utf-8')
+ f=readlines ()
+ fnouns=[]
+ for line in f:
+ fnouns.append (line.split(" "))
+ return random.choise(fnouns)
+def 2verb ():
+ file=open ('2verb.txt', 'r', encoding ='utf-8')
+ f=readlines ()
+ 2verbs=[]
+ for line in f:
+ 2verbs.append (line.split(" "))
+ return random.choise (2verbs)
+def sen ():
+ return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"."
+for i in range(5):
+ print (sen())
+import re
+def main():
+ with open ('lemon.html', 'r', encoding='utf-8') as f:
+ text=f.read()
+ a='+? '
+ if re.search (a, text):
+ card = re.search(a, text).group()
+ b='Семейство(?:.|\n)*?(.+?)'
+ if re.search(b, a):
+ с = re.search(b, a).group(1)
+ with open ('family.txt', 'a', encoding = 'utf-8') as f:
+ f.write(с)
+ else:
+ print('Family type not found.')
+ with open ('family.txt', 'a', encoding = 'utf-8') as f:
+ f.write('Family type not found.')
+ else:
+ print('Error!')
+ with open ('family.txt', 'a', encoding = 'utf-8') as f:
+ f.write('Error!')
+import os
+import re
+def count_words():
+ with open('answer1.txt', 'w', encoding='utf-8') as fout:
+ for root, dirs, files in os.walk('./news'):
+ for f in files:
+ count = 0
+ with open(os.path.join(root, f), 'r') as fin:
+ f1 = fin.read().split()
+ for line in f1:
+ if '' in line:
+ count += 1
+ fout.write('%s \t %d \n' %(f, count))
+def annot():
+ with open('answer2.csv', 'w', encoding='utf-8') as fout:
+ fout.write('Название файла \t Автор \t Дата создания')
+ for root, dirs, files in os.walk('./news'):
+ for f in files:
+ with open(os.path.join(root, f), 'r') as fin:
+ f2 = fin.read()
+ nam = f
+ reg1 = ''
+ reg2 = ''
+ auth = re.search(reg1, f2).group(1)
+ date = re.search(reg2, f2).group(1)
+ fout.write('%s \t %s \t %s \n' %(f, auth, date))
+def bigramms():
+ with open('answer3.txt', 'w', encoding='utf-8') as fout:
+ for root, dirs, files in os.walk('./news'):
+ for f in files:
+ with open(os.path.join(root, f), 'r') as fin:
+ f3 = fin.read().split('\n')
+ reg = '(.+?)'
+ for indx, sentence in enumerate(f3):
+ if '' in sentence:
+ f3[indx] = [re.search(reg, sentence).group(1), re.search(reg, sentence).group(2)]
+ else:
+ f3.remove(sentence)
+ temp = True
+ for indx, word in enumerate(f3):
+ try:
+ if 'A' in word[0]:
+ if 'gen' in word[0]:
+ if 'S' in f3[indx + 1][0]:
+ if 'gen' in f3[indx + 1][0]:
+ fout.write('%s %s \n' %(word[1], f3[indx + 1][1]))
+ except IndexError:
+ temp = False
+def main():
+ count_words()
+ annot()
+ bigramms()
+if __name__ == '__main__':
+ main()
+def done_text():
+ f = open('ostin.txt', 'r', encoding='utf-8')
+ s = f.read().lower().split()
+ f.close()
+ for indx, word in enumerate(s):
+ s[indx] = word.strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
+ return s
+def count_words(arr):
+ d = {}
+ for word in arr:
+ if word in d:
+ d[word] += 1
+ else:
+ d[word] = 1
+ return d
+def count_letters(arr):
+ dic = {}
+ alpha = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+ for letter in alpha:
+ dic[letter] = 0
+ for word in arr:
+ if word and word[0] in dic:
+ dic[word[0]] += 1
+ return dic
+def count_pos(arr):
+ dic = {key:ind for ind, key in enumerate(arr)}
+ return dic
+def create_antw(dic):
+ f = open('answer_keys2.tsv', 'w', encoding='UTF-8')
+ for key in sorted(dic):
+ f.write('{0}\t{1}\n'.format(key, str(dic[key])))
+ f.close()
+def main():
+ textik = done_text()
+ create_antw(count_pos(textik))
+if __name__ == '__main__':
+ main()
+my_num = 9
+your_num = int(input('Write a number from 1 to 10, please: '))
+if your_num == my_num:
+ print('You\'re lucky one :D')
+else:
+ if your_num > my_num:
+ print('Your number is too big')
+ else:
+ print('Your number is too small')
+ your_num = int(input('Try again: '))
+ if your_num == my_num:
+ print('You\'re lucky one :D')
+ else:
+ print('You\'re hopeless')
+import re
+def split_txt():
+ f = open('test1.txt', 'r', encoding='UTF-8')
+ s = f.read()
+ s.replace('\n', ' ')
+ s1 = re.sub('(\?|!|\.\.\.|([а-яa-z.]+ [а-яa-zА-ЯA-Z]{2,}\.))', '\\1^', s)
+ print(s1)
+def main():
+ split_txt()
+main()
+import re
+def find_space(fname):
+ f = open(fname, 'r', encoding='UTF-8')
+ s = f.read().split()
+ regex = '«[a-zA-ZА-Яа-я]+?-[0-9]'
+ wlist = re.findall(regex, s)
+ print(', '.join(wlist))
+def main():
+ find_space('test.txt')
+main()
+coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+while True:
+ s = input('Введите текст: ')
+ if s == '':
+ break
+ s = s.split()
+ res = ''
+ if s[0] == 'decode':
+ s.pop(0)
+ s = ' '.join(s)
+ for letter in s:
+ if letter == '!':
+ res += ' '
+ else:
+ for indx, i in enumerate(coinc):
+ if i == letter:
+ if i == 'A':
+ res += 'Z'
+ elif i == 'a':
+ res += 'z'
+ elif i == 'А':
+ res += 'Я'
+ elif i == 'а':
+ res += 'я'
+ else:
+ res += coinc[indx - 1]
+ else:
+ if s[0] == 'code':
+ s.pop(0)
+ s = ' '.join(s)
+ for letter in s:
+ if letter == ' ':
+ res += '!'
+ else:
+ for indx, i in enumerate(coinc):
+ if i == letter:
+ if i == 'Z':
+ res += 'A'
+ elif i == 'z':
+ res += 'a'
+ elif i == 'Я':
+ res += 'А'
+ elif i == 'я':
+ res += 'а'
+ else:
+ res += coinc[indx + 1]
+ print (res)
+print('Программа завершила свою работу!')
+words=[]
+check = True
+while check is True:
+ inp = input("Введите слово: ")
+ if inp == "":
+ check = False
+ else:
+ words.append(inp)
+for indx in range(len(words) - 1, -1, -1):
+ print(words[indx])
+check = True
+while check is True:
+ s = input("Введите текст: ")
+ if s == "":
+ check = False
+ else:
+ res = ""
+ for letter in s:
+ if ord(letter) == 90:
+ res += chr(65)
+ elif ord(letter) == 122:
+ res += chr(97)
+ elif ord(letter) == 1071:
+ res += chr(1040)
+ elif ord(letter) == 1103:
+ res += chr(1072)
+ else:
+ res += chr(ord(letter) + 1)
+ print (res)
+print("Программа завершила работу")
+names = ['Оля','Маша','Коля','Костя','Нина','Ира']
+surnames=['Кузнецова', 'Сидорова', 'Семенов', 'Иванов', 'Илларионова']
+if len(names) >= len(surnames):
+ for i in range(len(surnames)):
+ strng = names[i] + ' ' + surnames[i]
+ print(strng)
+ check = len(surnames)
+ while check < len(names):
+ print(names[check])
+ check += 1
+else:
+ for i in range(len(names)):
+ strng = names[i] + ' ' + surnames[i]
+ print(strng)
+ check = len(names)
+ while check < len(surnames):
+ print(surnames[check])
+ check += 1
+def doneText(fname):
+ f = open(fname, 'r')
+ s = f.read().split(' ')
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"')
+ f.close
+ return s
+fnm = input('Введите имя файла: ')
+arr = doneText(fnm)
+print('Количество слов в тексте = {0}'.format(arr.len()))
+my_num = 9
+check = False
+while (check == False):
+ try:
+ your_num = int(input('Write a number from 1 to 10, please: '))
+ except ValueError:
+ print("It's not a number, I'm out")
+ break
+ if your_num == my_num:
+ print('You\'re lucky one :D')
+ check = True
+ else:
+ if your_num > my_num:
+ print('Your number is too big')
+ else:
+ print('Your number is too small')
+print("End of programme")
+check = True
+while check == True:
+ word = input('Write a word in cyrillic: ')
+ if word == "":
+ check = False
+ print("Empty word, I'm out")
+ else:
+ if word.endswith('о') or word.endswith('н') or word.endswith('р'):
+ print('Possible forms: Nom. Sg. / Acc. Sg.')
+ elif word.endswith('а'):
+ print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.')
+ elif word.endswith('у'):
+ print('Possible forms: Dat. Sg.')
+ elif word.endswith('ом'):
+ print('Possible forms: Instrum. Sg. / Nom. Sg.')
+ elif word.endswith('е'):
+ print('Possible forms: Prep. Sg.')
+ elif word.endswith('ам'):
+ print('Possible forms: Dat. Pl.')
+ elif word.endswith('ами'):
+ print('Possible forms: Instrum. Pl.')
+ elif word.endswith('ах'):
+ print('Possible forms: Prep. Pl.')
+ elif word.endswith('и'):
+ print('Possible forms: Nom. Pl.')
+ else:
+ print('Possible forms: Gen. Pl.')
+print('Thanks for using!')
+import os
+import re
+def rem_dir(name_dir):
+ for root, dirs, files in os.walk('.', topdown=False):
+ if re.findall(os.sep + name_dir, root):
+ for f in files:
+ os.remove(os.path.join(root, f))
+ for d in dirs:
+ os.rmdir(os.path.join(root, d))
+ for d in dirs:
+ if name_dir == d:
+ os.rmdir(os.path.join(root, d))
+def print_root():
+ s = '--'
+ for root, dirs, files in os.walk('.'):
+ print (s + root)
+ if len(dirs):
+ s = '\t' + s
+ for f in files:
+ print('\t{0}'.format(f))
+def main():
+ rem_dir('wrong')
+ print_root()
+if __name__ == '__main__':
+ main()
+word = input('Write a word in cyrillic: ')
+if word.endswith('о'):
+ print('Possible forms: Nom. Sg. / Acc. Sg.')
+elif word.endswith('а'):
+ print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.')
+elif word.endswith('у'):
+ print('Possible forms: Dat. Sg.')
+elif word.endswith('ом'):
+ print('Possible forms: Instrum. Sg.')
+elif word.endswith('е'):
+ print('Possible forms: Prep. Sg.')
+elif word.endswith('ам'):
+ print('Possible forms: Dat. Pl.')
+elif word.endswith('ами'):
+ print('Possible forms: Instrum. Pl.')
+elif word.endswith('ах'):
+ print('Possible forms: Prep. Pl.')
+else:
+ print('Possible forms: Gen. Pl.')
+import re
+def done_text(fname):
+ f = open(fname, 'r', encoding='UTF-8')
+ s = f.read().split()
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><')
+ f.close
+ return s
+def print_words(s):
+ regex = '.*[ауоиыеюя].*[ауоиыеюя].*[ауоиыеюя].*'
+ for word in s:
+ if re.search(regex, word):
+ print(word)
+def main():
+ textik = done_text(input('Введите имя файла с расширением: '))
+ print_words(textik)
+main()
+import os
+import re
+import math
+from math import log
+PUNCT = '[.,!«»?&@"$\[\]\(\):;%
+def preprocessing(text):
+ text_wo_punct = re.sub(PUNCT, '', text.lower())
+ words = text_wo_punct.strip().split()
+ return words
+def count_tf(word, text):
+ return text.count(word) / len(text)
+def count_df(word, texts):
+ n = [1 for text in texts if word in text]
+ return sum(n)
+def count_idf(word, texts):
+ n = len(texts) / (1 + count_df(word, texts))
+ return n
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ return log(tf, 10) * log(idf, 10)
+def get_texts():
+ texts_dic = {}
+ for root, dirs, files in os.walk('wikipedia'):
+ for f in files[:50]:
+ with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
+ text = preprocessing(t.read())
+ texts_dic[f.split('.')[0]] = text
+ texts = list(texts_dic.values())
+ return texts_dic, texts
+def fin_output(texts_dic, texts):
+ for text in texts_dic:
+ print("Top words in document {}".format(text))
+ scores = {}
+ for word in texts_dic[text]:
+ scores[word] = count_tfidf(word, texts_dic[text], texts)
+ sorted_words = sorted(scores.items(), key=lambda x: x[1])
+ for word, score in sorted_words[:5]:
+ print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
+def main():
+ a = get_texts()
+ fin_output(a[0], a[1])
+if __name__ == '__main__':
+ main()
+check = True
+coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+while check is True:
+ s = input('Введите текст: ')
+ if s == '':
+ check = False
+ else:
+ res = ''
+ for letter in s:
+ if letter == ' ':
+ res += ' '
+ else:
+ for indx, i in enumerate(coinc):
+ if i == letter:
+ if i == 'A':
+ res += 'Z'
+ elif i == 'a':
+ res += 'z'
+ elif i == 'А':
+ res += 'Я'
+ elif i == 'а':
+ res += 'я'
+ else:
+ res += coinc[indx - 1]
+ print (res)
+print('Программа завершила свою работу!')
+def done_text(fname):
+ f = open(fname, 'r')
+ s = f.read().split()
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><')
+ f.close
+ return s
+def freq_dic(arr):
+ dic = {}
+ for word in arr:
+ if word not in dic:
+ dic[word] = 1
+ else:
+ dic[word] += 1
+ return dic
+def print_dic(dic):
+ for word in dic:
+ if dic[word] >= 10:
+ print(word, dic[word])
+def main():
+ my_text = done_text(input('Введите имя файла с расшриением: '))
+ print_dic(freq_dic(my_text))
+main()
+def done_text(fname):
+ f = open(fname, 'r')
+ s = f.read().split()
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
+ f.close
+ return s
+def count_syll(arr, n):
+ res = []
+ voc = 'аоуыиеёюя'
+ for word in arr:
+ num = 0
+ for letter in word:
+ if letter in voc:
+ num += 1
+ if num == n:
+ res.append(word)
+ return res
+def first_letter(arr, letter):
+ res = []
+ for word in arr:
+ if word.startswith(letter):
+ res.append(word)
+ return res
+def choice():
+ fnm = input('Введите имя файла: ')
+ textik = done_text(fnm)
+ make_choice = input('Если хотите, чтобы программа считала слоги, введите syllables; иначе - letter: ')
+ if make_choice == 'syllables':
+ numb = int(input('Введите количество слогов в словах: '))
+ print(' '.join(count_syll(textik, numb)))
+ else:
+ lett = input('Введите желаемую первую букву: ')
+ print(' '.join(first_letter(textik, lett)))
+def main():
+ choice()
+main()
+import os
+def mk_ppk(s):
+ s = s.split()
+ pth = '.'
+ for word in s:
+ pth += os.sep + word
+ if not os.path.exists(pth):
+ os.makedirs(pth)
+def mk_fls(num):
+ pth = '.'
+ for ppk in range(num):
+ pth += os.sep + str(ppk+1)
+ if not os.path.exists(pth):
+ os.makedirs(pth)
+ for pp_quant in range(ppk+1):
+ f = open(pth + os.sep + str(pp_quant + 1) + '.txt', 'w')
+ f.close()
+ pth = '.'
+def prnt_dir(nm_dir):
+ for fl in os.listdir(nm_dir):
+ if os.path.isdir(fl):
+ print(fl)
+def main():
+ mk_ppk(input('Введите приложение: '))
+ mk_fls(int(input('Введите число: ')))
+ prnt_dir('.')
+if __name__ == '__main__':
+ main()
+my_num = 9
+check = False
+your_num = int(input('Write a number from 1 to 10, please: '))
+while(your_num != my_num):
+ if your_num > my_num:
+ print('Your number is too big')
+ else:
+ print('Your number is too small')
+ try:
+ your_num = int(input('Try again: '))
+ except ValueError:
+ print("Not a number")
+ check = True
+ break
+if check == True:
+ print("See you next time")
+else:
+ print("You're right")
+check = True
+coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+while check is True:
+ s = input('Введите текст: ')
+ if s == '':
+ check = False
+ else:
+ res = ''
+ for letter in s:
+ if letter == ' ':
+ res += ' '
+ else:
+ for indx, i in enumerate(coinc):
+ if i == letter:
+ if i == 'Z':
+ res += 'A'
+ elif i == 'z':
+ res += 'a'
+ elif i == 'Я':
+ res += 'А'
+ elif i == 'я':
+ res += 'а'
+ else:
+ res += coinc[indx + 1]
+ print (res)
+print('Программа завершила свою работу!')
+f = open('freq_crlf.txt', 'r', encoding='utf-8')
+s = f.read().split('\n')
+f.close()
+for line in s:
+ line = line.split(' | ')
+ if line[1] == 'союз':
+ print(' | '.join(line))
+f = open('freq_crlf.txt', 'r', encoding='utf-8')
+s = f.read().split('\n')
+f.close()
+arr = []
+while True:
+ word = input('Введите слово: ')
+ if word == '':
+ print('Результаты:')
+ break
+ else:
+ arr.append(word)
+for word in arr:
+ check = False
+ for line in s:
+ line = line.split(' | ')
+ if word == line[0]:
+ print(' | '.join(line))
+ check = True
+ if check is False:
+ print(u'{0}: Такого слова в словаре нет.'.format(word))
+print('Завершение работы программы')f = open('freq_crlf.txt', 'r', encoding='utf-8')
+s = f.read().split('\n')
+f.close()
+while True:
+ word = input('Введите слово: ')
+ if word == '':
+ print('Завершение работы программы')
+ break
+ else:
+ check = False
+ for line in s:
+ line = line.split(' | ')
+ if word == line[0]:
+ print(' | '.join(line))
+ check = True
+ if check is False:
+ print('Такого слова в словаре нет.')
+import decimal
+f = open('freq_crlf.txt', 'r', encoding='utf-8')
+s = f.read().split('\n')
+f.close()
+ress = ''
+ipm_sum = 0
+for line in s:
+ line = line.split(' | ')
+ if line[1].find('ед жен') != -1:
+ ress += line[0]
+ ress += ', '
+ ipm_sum += decimal.Decimal(line[2])
+print(ress)
+print(u'Суммарное значение ipm = {0}'.format(ipm_sum))import re
+def open_file():
+ f = open('islandic.xml', 'r', encoding='UTF-8')
+ s = f.read()
+ f.close()
+ return s
+def count_lines():
+ s = open_file()
+ s = s.split('\n')
+ f = open('answer_length.txt', 'w', encoding='UTF-8')
+ f.write(str(len(s)))
+ f.close()
+def my_diction(arr):
+ dix = {}
+ for word in arr:
+ if word in dix:
+ dix[word] += 1
+ else:
+ dix[word] = 1
+ return dix
+def create_diction():
+ s = open_file()
+ regex = ''
+ arr = re.findall(regex, s)
+ dix = my_diction(arr)
+ f = open('answer_keys.txt', 'w', encoding='UTF-8')
+ f.write('Отсортированный список морфологических разборов:\n')
+ for key in sorted(dix):
+ f.write(key + '\n')
+ f.close()
+def count_adj():
+ s = open_file()
+ regex = ''
+ arr = re.findall(regex, s)
+ dix = my_diction(arr)
+ f = open('answer_adj.txt', 'w', encoding='UTF-8')
+ for key in sorted(dix):
+ f.write(key + ' ' + str(dix[key]) + '\n')
+ f.close()
+def create_csv():
+ s = open_file()
+ print(s)
+ regex1 = '(.+?)'
+ regex2 = '<.+?>\n'
+ s = re.sub(regex1, '\\1, \\2, \\3', s)
+ s = re.sub(regex2, '', s)
+ s = re.sub('( )+?', '', s)
+ s = s.split('\n')
+ f = open('answer_dict.csv', 'w', encoding='UTF-8')
+ for line in s:
+ f.write(line + '\n')
+ f.close()
+def main():
+ count_lines()
+ create_diction()
+ count_adj()
+ create_csv()
+if __name__ == '__main__':
+ main()
+check = True
+words = []
+while check is True:
+ s = input("Введите слово: ")
+ if s == "":
+ check = False
+ else:
+ temp = []
+ for letter in s:
+ temp.append(letter)
+ words.append(temp)
+for wrd in words:
+ for letterindx in range(2, len(wrd), 2):
+ if letterindx >= len(wrd):
+ break
+ wrd.pop(letterindx)
+ s = ""
+ for letterindx in range(len(wrd) - 1, -1, -1):
+ s += wrd[letterindx]
+ print(s)f = open('input.txt', 'r', encoding='UTF-8')
+s = f.read().split('\n')
+f.close()
+avgsum = 0
+for indx, line in enumerate(s):
+ s[indx] = line.split()
+ avgsum += len(s[indx])
+print(u'Среднее количество слов в строке = {0}'.format(avgsum / len(s)))import re
+word = input("Введите слово на кириллице: ")
+pattern1 = r'[А-Яа-я]'
+pattern2 = r'[1-9A-Za-z]'
+if re.match(pattern1, word) and re.search(pattern2, word) is None:
+ for indx, letter in enumerate(word):
+ if indx % 2 != 0:
+ if letter != "а" and letter != "к":
+ print(letter)
+else:
+ print("Вводить можно только кириллицу :Р")temp = False
+while (temp == False):
+ try:
+ a = float(input('Введите первое число (a) '))
+ b = float(input('Введите второе число (b) '))
+ c = float(input('Введите третье число (c) '))
+ temp = True
+ except (TypeError, ValueError):
+ print('Просила же только числа вводить!')
+if a + b == c:
+ print('Поздравляю! a + b = c')
+else:
+ print('Прошу прощения, но a + b != c')
+if a*c + b == 0:
+ print('Поздравляю! a*c + b = 0')
+else:
+ print('Прошу прощения, но a*c + b != 0')import random
+def ask_name():
+ return input('Введите имя файла с расширением: ')
+def get_words():
+ f = open(ask_name(), 'r', encoding='UTF-8')
+ s = f.read().split('\n')
+ f.close()
+ dic = {}
+ for ln in s:
+ temp = ln.split(',')
+ dic[temp[0]] = temp[1:]
+ return dic
+def guess_word(word):
+ num = len(word)
+ if num <= 4:
+ print('У вас {0} попытки'.format(num))
+ else:
+ print('У вас {0} попыток'.format(num))
+ while num > 0:
+ temp = input('Введите слово: ')
+ if temp == word:
+ print('Вы угадали!')
+ break
+ else:
+ print('Попробуйте еще раз!')
+ num -= 1
+ if num == 0:
+ print('Повезет в другой раз!')
+def game(d):
+ num_check = 0
+ for k in d:
+ print('Подсказка! {0} ...'.format(random.choice(d[k])))
+ guess_word(k)
+ num_check += 1
+ if num_check == len(d):
+ print('Это было последнее слово. Приходите еще')
+ break
+ ask = input('Хотите попробовать еще раз? Введите только "да" или "нет": ')
+ if ask == 'нет':
+ break
+def main():
+ d = get_words()
+ game(d)
+main()def done_text(fname):
+ f = open(fname, 'r')
+ s = f.read().split()
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
+ f.close
+ return s
+def get_fname():
+ return input("Введите имя файла с расширением: ")
+def count_ing(arr):
+ res = 0
+ for word in arr:
+ if word.endswith('ing'):
+ res += 1
+ return res
+def count_form(arr, form):
+ res = 0
+ for word in arr:
+ if word == form:
+ res += 1
+ return res
+def main():
+ textik = done_text(get_fname())
+ print('Всего в тексте {0} форм на -ing'.format(count_ing(textik)))
+ form = input('Введите форму, количество вхождений которой хотите найти: ')
+ print('Эта форма встречается {0} раз'.format(count_form(textik, form)))
+main()import re
+def search_inf(fname):
+ f = open(fname, 'r', encoding='UTF-8')
+ s = f.read()
+ f.close()
+ regex = '>Столица.*?([А-Яа-я]+(-[А-Яа-я]+)*)'
+ res = re.search(regex, s, re.DOTALL)
+ if res:
+ k = open('answer.txt', 'w', encoding='UTF-8')
+ print(res.group(1))
+ k.write(res.group(1))
+ k.close()
+def main():
+ search_inf(input('Введите имя файла: '))
+if __name__ == '__main__':
+ main()import re
+def change_text():
+ f = open('mosq1.txt', 'r', encoding='UTF-8')
+ s = f.read()
+ f.close()
+ s1 = re.sub('Комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'Слон\\1', s)
+ s1 = re.sub('комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'слон\\1', s1)
+ f = open('antwort.txt', 'w', encoding='UTF-8')
+ f.write(s1)
+ f.close()
+def main():
+ change_text()
+if __name__ == '__main__':
+ main()import random
+def generate_adj():
+ f = open('adj.txt', 'r')
+ s = f.read().split()
+ f.close()
+ return random.choice(s)
+def generate_noun(num):
+ if num == 'sg':
+ f_name = 'noun_sg.txt'
+ else:
+ f_name = 'noun_pl.txt'
+ f = open(f_name, 'r')
+ s = f.read().split()
+ f.close()
+ return random.choice(s)
+def generate_verb():
+ f = open('verbs.txt', 'r')
+ s = f.read().split()
+ f.close()
+ return random.choice(s)
+def generate_punct(pos):
+ if pos == 'end':
+ f_name = 'end_punct.txt'
+ else:
+ f_name = 'mid_punct.txt'
+ f = open(f_name, 'r')
+ s = f.read().split()
+ f.close()
+ punct = random.choice(s)
+ if punct == '-':
+ punct = ' ' + punct
+ return punct
+def generate_pronoun():
+ f = open('pronouns.txt', 'r')
+ s = f.read().split()
+ f.close()
+ return random.choice(s)
+def generate_intj():
+ f = open('intj.txt', 'r')
+ s = f.read().split('\n')
+ f.close()
+ return random.choice(s)
+def generate_line(num):
+ if num == 1:
+ return generate_adj() + ' ' + generate_noun('sg') + generate_punct('end') + '\n'
+ elif num == 2:
+ return generate_verb() + ' ' + generate_noun('pl') + ' и' + '\n'
+ else:
+ return generate_pronoun() + generate_punct('mid') + ' ' + generate_intj() + generate_punct('end') + '\n'
+def generate_haiku():
+ return generate_line(1) + generate_line(2) + generate_line(3)
+print(generate_haiku())import re
+def done_text(fname):
+ f = open(fname, 'r', encoding='UTF-8')
+ s = f.read().lower()
+ rez1 = '(,|:|№|-|\*|/|\||\[|\]|{|}|\\|(|)|\'|"|[0-9]|«|»|>|<|V|I|X)+'
+ s = re.sub(rez1, ' ', s)
+ rez = '\.|\?|!|\.\.\.'
+ s = re.split(rez, s)
+ f.close()
+ for indx, sent in enumerate(s):
+ s[indx] = sent.split()
+ if len(s[indx]) == 0:
+ s.pop(indx)
+ return s
+def count_letters(arr):
+ mlist = [(indx + 1, word, len(word)) for indx, senten in enumerate(arr) for word in senten if len(word) >= 7]
+ f = open('answer_sheet12.txt', 'w', encoding='UTF-8')
+ for k in mlist:
+ f.write('предложение {0}, {1}-------{2}\n'.format(k[0], k[1], k[2]))
+ f.close()
+def main():
+ count_letters(done_text('tolstoy.txt'))
+if __name__ == '__main__':
+ main()import os
+import re
+def count_dirs():
+ res = '[0-9]'
+ arr = [thing for thing in os.listdir('.') if os.path.isdir(thing) and len(re.findall(res, thing))]
+ return arr
+def print_answer(arr):
+ fout = open('answer_sheet13.txt', 'w', encoding='UTF-8')
+ fout.write('Всего папок с цифрами в названии - {0}.'.format(str(len(arr))))
+ fout.write('Все имена в директории (без повторений):\n')
+ clear_names = []
+ for thing in os.listdir('.'):
+ temp = thing
+ if os.path.isfile(thing):
+ temp = re.sub('\..+', '', thing)
+ if temp not in clear_names:
+ clear_names.append(temp)
+ for nme in clear_names:
+ if nme:
+ fout.write(nme + '\n')
+ fout.close()
+def main():
+ print_answer(count_dirs())
+if __name__ == '__main__':
+ main()import os
+def count_dep():
+ count = 0
+ for root, dirs, files in os.walk('.', topdown=False):
+ if len(root.split(os.sep)) - 1 > count:
+ count = len(root.split(os.sep)) - 1
+ with open('answer_sheet14.txt', 'w', encoding='UTF-8') as answer:
+ answer.write(str(count))
+def main():
+ count_dep()
+if __name__ == '__main__':
+ main()while True:
+ s = input("Введите строку: ")
+ if s == "":
+ break
+ for indx, part in enumerate(s):
+ print(s[:len(s) - indx])s = input("Введите строку: ")
+for indx, part in enumerate(s):
+ print(s[:len(s) - indx])import re
+def ask_name():
+ return input('Введите имя файла с расширением: ')
+def get_words():
+ f = open(ask_name(), 'r', encoding='UTF-8')
+ s = f.read().split()
+ for indx, word in enumerate(s):
+ s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
+ f.close
+ return s
+def count_words(words):
+ regex = 'откр(ы|о)((т(ый|ая|ое|ые|ого|ой|ых|ому|ым|ую|ом|ою)|в(ш(ий?|ая|ее|ие|его|ей|их|ему|им|ую|ею))?)|(л(а|о|и)?)|(й(те)?)|(ют?|е(шь|м|те?)))(ся|сь)?'
+ wlist = []
+ for word in words:
+ if re.fullmatch(regex, word):
+ if word not in wlist:
+ wlist.append(word)
+ return wlist
+def main():
+ print(', '.join(count_words(get_words())))
+main()word = input('Введите слово: ')
+if word == '':
+ print ('Слово не введено')
+word2 = ''
+for i in range(len(word)):
+ for k in range(len(word)):
+ if k + i < len(word):
+ word2 += word[k + i]
+ else:
+ word2 += word[k + i - len(word)]
+ print (word2)
+ word2 = ''
+import re
+def open_and_edit():
+ f = open("verbs.txt", 'r', encoding = "utf-8")
+ s = f.read()
+ f.close()
+ s1 = s.lower()
+ a = s1.split()
+ for i, word in enumerate(a):
+ a[i] = word.strip('.,!?();:*/\|<>-_%&
+ return a
+def find_and_print(a):
+ arr = []
+ for word in a:
+ if re.search('^программир((у(ю(т|щ(и(й|ми?|е|х)|е(го|му?|й)|ая|ую))?|я|е(шь|те?))|ова(л(а|и)?|ть))(с(я|ь))?|уем(ы(й|ми?|е|х)?|о(го|му?|й)|ая?|ую))', word):
+ if word not in arr:
+ arr.append(word)
+ for verb in arr:
+ print(verb)
+def main():
+ text = open_and_edit()
+ find_and_print(text)
+main()
+import re
+import os
+def task1():
+ for root, dirs, files in os.walk('.\\news'):
+ s = ''
+ for f in files:
+ file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
+ text = file.readlines()
+ words = 0
+ for line in text:
+ reg = ''
+ r = re.search(reg,line)
+ if r:
+ words += 1
+ s += f + '\t' + str(words) + '\n'
+ f2 = open("words_in_files.txt", 'w', encoding = "utf-8")
+ f2.write(s)
+def task2():
+ for root, dirs, files in os.walk('.\\news'):
+ s = ''
+ for f in files:
+ file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
+ text = file.readlines()
+ author = ''
+ date = ''
+ for line in text:
+ reg_author = 'content="([ |(а-яА-яa-zA-Z)]+)" name="author"'
+ reg_date = 'content="([0-9]+\.[0-9]+\.[0-9]+)" name="created"'
+ r1 = re.search(reg_author, line)
+ if r1:
+ author = r1.group(1)
+ r2 = re.search(reg_date, line)
+ if r2:
+ date = r2.group(1)
+ s += f + '\t' + author + '\t' + date + '\n'
+ f3 = open("words_in_files.csv", 'w', encoding = "utf-8")
+ f3.write(s)
+def task3():
+ for root, dirs, files in os.walk('.\\news'):
+ s = ''
+ for f in files:
+ file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
+ text = file.readlines()
+ for i, line in enumerate(text):
+ reg_adj = 'A=.+gen.+>?'
+ reg_sumj = 'S,.+gen.+>?'
+ reg_word = '([а-яА-Я]+|`)\n'
+ r1 = re.search(reg_adj, line)
+ if r1:
+ r2 = re.search(reg_word, line)
+ word1 = r2.group(1)
+ r3 = re.search(reg_subj, text[i+1])
+ if r3:
+ word2 = r3.group(1)
+def main():
+ task1()
+ task2()
+main()
+import re
+def open_and_edit():
+ f = open("linguistics.txt", 'r', encoding = "utf-8")
+ s = f.read()
+ f.close()
+ return s
+def replace_and_output(s):
+ s1 = re.sub('язык([а-я]{,3}( |\.|,|\)))','шашлык\\1', s)
+ s2 = re.sub('Язык([а-я]{,3}( |\.|,|\)))','Шашлык\\1', s1)
+ f = open("shashlyk.txt", 'w', encoding = "utf-8")
+ f.write(s2)
+ print('Текст записан в файл shashlyk.txt')
+ f.close()
+def main():
+ text = open_and_edit()
+ replace_and_output(text)
+main()
+import os
+import re
+def count_folders():
+ result = 0
+ for f in os.listdir('.'):
+ if os.path.isdir(f):
+ if re.search('^([а-яА-Я]| )+$',f):
+ result += 1
+ print('Найдено папок:',result)
+def print_names():
+ names = {}
+ file_name = '^(.+)(\.[a-z]+)$'
+ for f in os.listdir('.'):
+ if os.path.isdir(f):
+ if f not in names:
+ names[f] = 1
+ if os.path.isfile(f):
+ r = re.search(file_name,f)
+ if r:
+ name = r.group(1)
+ if name not in names:
+ names[name] = 1
+ for name in sorted(names):
+ print(name)
+def main():
+ count_folders()
+ print_names()
+main()
+def read_file():
+ f = open("words.csv", 'r', encoding = "utf-8")
+ a = f.readlines()
+ f.close()
+ return(a)
+def make_dict(a):
+ words = {}
+ for line in a:
+ a2 = line.split(';')
+ for i, h in enumerate(a2):
+ a2[i] = h.strip()
+ words[a2[1]] = a2[0]
+ return words
+def guess(dic):
+ for noun in dic:
+ print(dic[noun], '...')
+ attempt = 0
+ while attempt != len(dic[noun]):
+ print('Осталось попыток: ', len(dic[noun]) - attempt )
+ attempt += 1
+ if input() == noun:
+ print('Маладэц!')
+ attempt = len(dic[noun])
+ elif len(dic[noun]) - attempt == 0:
+ print('Не угадал :(')
+def main():
+ text = read_file()
+ words = make_dict(text)
+ print(words)
+ guess(words)
+main()
+import re
+def open_and_edit():
+ f = open("hse.html", 'r', encoding = "utf-8")
+ s = f.read()
+ f.close()
+ return s
+def find_and_print(s):
+ reg1 = 'Преподаватели.*?\n.*?\n[0-9]+ ?[0-9]+'
+ reg2 = 'Преподаватели.*?\n.*?\n '
+ res1 = re.findall(reg1,s)
+ res2 = re.findall(reg2,s)
+ number = res1[0].replace(res2[0], '')
+ print('Число преподавателей:',number)
+ f = open("found_number.txt", 'w', encoding = "utf-8")
+ f.write(number)
+ f.close()
+def main():
+ text = open_and_edit()
+ find_and_print(text)
+main()
+words = []
+while True:
+ newword = input('Введите слово: ')
+ if newword == '':
+ break
+ else:
+ words.append(newword)
+for i in range(len(words)):
+ string = words[i]
+ if (i+1) >= len(string):
+ print('В этом слове не осталось символов')
+ else:
+ print(string[i+1:])
+a = input ('Введите число a: ')
+b = input ('Введите число b: ')
+c = input ('Введите число c: ')
+a = int (a)
+b = int (b)
+c = int (c)
+if a % b == c:
+ print ('a дает остаток c при делении на b')
+else:
+ print ('a не дает остаток c при делении на b')
+if a * c + b == 0:
+ print ('c является решением линейного уравнения ax + b = 0')
+else:
+ print ('c не является решением линейного уравнения ax + b = 0')
+import random
+
+def open_file():
+ f = open("words.txt", 'r', encoding = "utf-8")
+ text = f.readlines()
+ f.close()
+ return(text)
+
+def find_words(word,text):
+ for i in range(len(text)):
+ line = []
+ line = text[i].split()
+ for l, w in enumerate(line):
+ line[l] = w.strip('.,!?();:*/\|<>-_%&
+ if line[0] == word:
+ words = []
+ for j in range(len(line)):
+ if j > 0:
+ words.append(line[j])
+ return(words)
+
+def noun():
+ find = 'существительное'
+ nouns = find_words(find, text)
+ return random.choice(nouns)
+
+def imperative():
+ find = 'императив'
+ imper = find_words(find, text)
+ return random.choice(imper)
+
+def adverb(imp):
+ find = 'наречие'
+ adverbs = find_words(find, text)
+ return random.choice(adverbs) + ' ' + imp
+
+def verb():
+ find = 'глагол'
+ verbs = find_words(find, text)
+ return random.choice(verbs)
+
+def adjective():
+ find = 'прилагательное'
+ adj = find_words(find, text)
+ return random.choice(adj)
+
+def question_word():
+ find = 'вопрос'
+ quest = find_words(find, text)
+ return random.choice(quest)
+
+def pos_sentence():
+ sentence = adjective() + ' ' + noun() + ' ' + verb() +\
+ ' ' + adjective() + ' ' + noun() + '.'
+ sentence = sentence.capitalize()
+ return(sentence)
+
+def neg_sentence():
+ sentence = adjective() + ' ' + noun() + ' не ' + verb() +\
+ ' ' + adjective() + ' ' + noun() + '.'
+ sentence = sentence.capitalize()
+ return(sentence)
+
+def quest_sentence():
+ sentence = question_word()+ ' ' + adjective() + ' ' + noun() +\
+ ' ' + verb() + ' ' + adjective() + ' ' + noun() + '?'
+ sentence = sentence.capitalize()
+ return(sentence)
+
+def imper_sentence():
+ sentence = adverb(imperative()) + ' ' + noun() + '!'
+ sentence = sentence.capitalize()
+ return(sentence)
+
+def if_sentence():
+ sentence = 'если бы ' + noun() + ' ' + verb() + ' ' + noun() +\
+ ', то ' + noun() + ' ' + verb() + ' бы ' + noun() + '.'
+ sentence = sentence.capitalize()
+ return(sentence)
+
+def random_print():
+ spisok = [pos_sentence(), neg_sentence(), quest_sentence(),\
+ imper_sentence(), if_sentence()]
+ random.shuffle(spisok)
+ for i in range(len(spisok)):
+ print(spisok[i], end = ' ')
+text = open_file()
+random_print()
+word = input ('Введите слово: ')
+indx = len(word)-1
+while indx >= 0:
+ if (word[indx]!= 'я') & (word[indx]!= 'з') :
+ print (word[indx])
+ indx -= 1
+import os
+import re
+def extensions():
+ ext_count = {}
+ for root, dirs, files in os.walk('.'):
+ for file in files:
+ ext = re.findall('\.[a-z0-9A-Z]+$', file)
+ if ext[0] not in ext_count:
+ ext_count[ext[0]] = 1
+ else:
+ ext_count[ext[0]] += 1
+ numb = 0
+ found_ext = ''
+ for ext in ext_count:
+ if ext_count[ext] > numb:
+ numb = ext_count[ext]
+ found_ext = ext
+ print(found_ext)
+def main():
+ extensions()
+main()
+import re
+def open_and_read():
+ f = open("animal_farm.txt", 'r', encoding = "utf-8")
+ s = f.read()
+ f.close()
+ return s
+def read_sentences(s):
+ s1 = re.sub('[a-z](\.|!|\?)','\\1@@', s)
+ a = s1.split('@@')
+ return a
+def split_and_count(a):
+ for i in range(len(a)):
+ words = a[i].split()
+ words2 = [words[j].strip('.,!?();:*/\|<>-_%&
+ for word in range(len(words2)):
+ print('%s_%s' %(words2[word], len(words2[word])))
+def main():
+ text = open_and_read()
+ sent = read_sentences(text)
+ split_and_count(sent)
+main()
+f = open("1.txt", 'r', encoding = "utf-8")
+word1 = 0
+word3 = 0
+for line in f:
+ words = line.split()
+ for i in range(len(words)):
+ if len(words[i]) == 3:
+ word3 += 1
+ elif len(words[i]) == 1:
+ word1 += 1
+ words = []
+if word1 == 0:
+ print("Нет слов длинны 1")
+else:
+ print(float(word3)/float(word1))
+def open_and_edit():
+ name = input('Введите название файла: ')
+ f = open(name, 'r', encoding = "utf-8")
+ s = f.read()
+ f.close()
+ s1 = s.lower()
+ a = s1.split()
+ for i, word in enumerate(a):
+ a[i] = word.strip('.,!?();:*/\|<>-_%&
+ return a
+def find_in_text(t):
+ hood = []
+ for word in t:
+ if word.endswith('hood'):
+ hood.append(word)
+ print('В тексте нашлось ', len(hood), ' существительных с суффиксом -hood')
+ return hood
+def short_list(arr):
+ short = []
+ arr2 = []
+ for k in arr:
+ arr2.append(k)
+ for i in range(len(arr2)-1):
+ if arr2[i]:
+ short.append(arr2[i])
+ x = 1
+ for j in range(i+1, len(arr2)):
+ if arr2[i]:
+ if arr2[i] == arr2[j]:
+ x += 1
+ arr2[j] = []
+ short.append(x)
+ return short
+def min_freq(arr):
+ short = short_list(arr)
+ min = short[1]
+ index = 1
+ for k in range(1, len(short), 2):
+ if short[k] < min:
+ index = k
+ min = short[k]
+ print('Минимальную частотность имеет существительное', short[index-1])
+def print_nouns(arr):
+ nouns = []
+ short = short_list(arr)
+ for word in short:
+ if type(word) != int:
+ nouns.append(word.replace('hood', ''))
+ all_nouns = ', '.join(nouns)
+ print('Найденный слова образованы от существительных ', all_nouns)
+def main():
+ text = open_and_edit()
+ found = find_in_text(text)
+ min_freq(found)
+ print_nouns(found)
+main()
+f = open("aphor.txt", 'r', encoding = "utf-8")
+a = f.readlines()
+f.close()
+
+for i in range(len(a)):
+ words = []
+ words = a[i].split()
+ numb = 0
+ for j in range(len(words)):
+ if words[j] != '—':
+ numb += 1
+ if numb < 16:
+ print(a[i])
+
+author = []
+um = 0
+for i in range(len(a)):
+ words = []
+ words = a[i].split()
+ for l, word in enumerate(words):
+ words[l] = word.strip('.,!?();:*/\|<>-_%&
+ for j in range(len(words)):
+ if words[j] == 'ум':
+ um += 1
+ povtor = 0
+ for k in range(len(author)):
+ if author[k] == words[len(words)-1]:
+ povtor += 1
+ if povtor == 0:
+ author.append(words[len(words)-1])
+out = ''
+out = ', '.join(author)
+print('Количество цитат = ', um)
+print('Источники: ', out)
+
+inp_words = []
+while True:
+ newword = input('Введите слово: ')
+ if newword == '':
+ break
+ else:
+ inp_words.append(newword)
+for j in range(len(inp_words)):
+ found = 0
+ print(inp_words[j])
+ for i in range(len(a)):
+ words = []
+ words = a[i].split()
+ for l, word in enumerate(words):
+ words[l] = word.strip('.,!?();:*/\|<>-_%&
+ for k in range(len(words)):
+ if inp_words[j] == words[k]:
+ print(a[i])
+ found += 1
+ break
+ if found == 0:
+ print('Цитата с этим словом не найдена')
+import re
+def open_file():
+ f = open("file.txt", 'r', encoding = "utf-8")
+ a = []
+ for line in f:
+ a.append(line)
+ f.close()
+ return a
+def open_new_file():
+ f = open("created_file.txt", 'w', encoding = "utf-8")
+ return f
+def write_lines_number(a,f):
+ i = 0
+ for line in a:
+ i += 1
+ f.write(str(i))
+ f.write('\n')
+def create_dictionary(a,f):
+ dic = {}
+ for line in a:
+ if '= 5:\n'))
+ while n < 5:
+ n = int(input('Введённое число < 5. Пожалуйста, введите число >= 5:\n'))
+ return n
+def func3(dic, n):
+ for i in range(n):
+ key = random.choice(list(dic.keys()))
+ m = 3
+ print(i + 1, '-ое слово. ', 'Подсказка: ', random.choice(dic[key]), ' ...', sep = '')
+ fl = 0
+ while fl != 1 and m != 0:
+ print('Попыток осталось: ', m, sep = '')
+ if input('Введите ниже ваш ответ:\n').lower() == key:
+ fl = 1
+ print('Молодец! Всё верно!')
+ else:
+ print('Неверно. ', end = '')
+ if m != 1:
+ print('Ещё одна подсказка: ', random.choice(dic[key]), ' ...', sep = '')
+ m -= 1
+ if fl == 0:
+ print('Вы не угадали. Правильный ответ: ', key, sep = '')
+def main():
+ dic = func1(input('Введите, пожалуйста, название файла:\n'))
+ n = func2()
+ func3(dic, n)
+main()
+
+
+
+def func1(name):
+
+
+
+
+ f = open(name, 'r', encoding = 'utf-8', errors = 'ignore')
+ words = f.read().replace('\n', ' ').split()
+ f.close()
+ for i, word in enumerate(words):
+ words[i] = word.lower().strip('.”“,/1234567890@
+ return words
+
+def func3(words, word):
+ fl = 0
+ num = 0
+ while fl != 1:
+
+ try:
+ ind = words.index(word)
+ except ValueError:
+ fl = 1
+ continue
+ num += 1
+ words.pop(ind)
+ print(word, ': frequency = ', num, sep = '')
+ return words
+def func2(words):
+ prefix = 'omni'
+
+ length = len(prefix)
+ for word in words:
+ if word.startswith(prefix) and length < len(word):
+ words = func3(words, word)
+ words = func3(words, word[length:])
+ print('-------------------------------------')
+def main():
+ func2(func1('file.txt'))
+main()
+
+
+
+
+def func1(name):
+
+
+
+
+ f = open(name, 'r', encoding = 'utf-8', errors = 'ignore')
+ words = f.read().replace('\n', ' ').split()
+ f.close()
+ for i, word in enumerate(words):
+ words[i] = word.lower().strip('.”“,/1234567890@
+ return words
+
+def func3(words, word):
+ num = 0
+ for elem in words:
+ if elem == word:
+ num += 1
+ print(word, ': frequency = ', num, sep = '')
+def func2(words):
+ prefix = 'under'
+
+ length = len(prefix)
+ l = []
+ for word in words:
+ if word.startswith(prefix) and length < len(word) and word not in l:
+ func3(words, word)
+ func3(words, word[length:])
+ print('-------------------------------------')
+ l.append(word)
+def main():
+ func2(func1('file.txt'))
+main()
+
+
+import os
+def files_and_folders():
+ lst = os.listdir('.')
+ files = []
+ folders = []
+ for f in lst:
+ if os.path.isfile(f):
+ files.append(f)
+ else:
+ folders.append(f)
+ d_files = {}
+ for f in files:
+ f_name, f_ext = os.path.splitext(f)
+ if f_name not in d_files:
+ d_files[f_name] = 1
+ else:
+ d_files[f_name] += 1
+
+ return d_files, folders
+def counting(d_files):
+ num = 0
+ punct_marks = '.!?:;,-()"\'<>'
+
+ for key in d_files:
+ fl = 0
+ i = 0
+ while fl != 1 and i < len(punct_marks):
+ if punct_marks[i] in key:
+ fl = 1
+ i += 1
+ if fl == 1:
+ num += d_files[key]
+ return num
+def output(num, d_files, d_folders):
+
+ print('Количество файлов, названия которых содержит знаки препинания = ', num)
+ print('Названия файлов и папок в данной папке следующие:')
+ i = 1
+ for key in d_files:
+ print('%s) %s' % (str(i), str(key)))
+ i += 1
+ for key in d_folders:
+ if key not in d_files:
+ print('{}) {}'.format(str(i), str(key)))
+ i += 1
+def main():
+ d_files, folders = files_and_folders()
+ num = counting(d_files)
+ output(num, d_files, folders)
+if __name__ == '__main__':
+ main()
+
+import os
+def walking():
+ num = 0
+ for root, dirs, files in os.walk('.'):
+ d_files = {}
+ flag = False
+ for file in files:
+ file_name, file_ext = os.path.splitext(file)
+ if file_ext not in d_files:
+ d_files[file_ext] = 1
+ else:
+ flag = True
+ break
+ if flag:
+ num += 1
+ return num
+def main():
+ num = walking()
+ print('Количество папок, в которых встречаются несколько файлов с одним\
+и тем же расширением = {}.'.format(num))
+if __name__ == '__main__':
+ main()
+
+import os
+def walking():
+ num = 0
+ for root, dirs, files in os.walk('.'):
+ d_files = {}
+ flag = False
+ for file in files:
+ file_name, file_ext = os.path.splitext(file)
+ if file_ext not in d_files:
+ d_files[file_ext] = 1
+ else:
+ flag = True
+ print(file)
+ break
+ if not flag:
+ num += 1
+ return num
+def main():
+ num = walking()
+ print('Количество папок, в которых встречаются несколько файлов с одним\
+и тем же расширением = {}.'.format(num))
+if __name__ == '__main__':
+ main()
+import re
+def reading(name):
+ f = open(name, 'r', encoding = 'utf-8')
+ words = f.read().split(' ')
+ f.close()
+ return words
+def cleaning(words):
+ for i, word in enumerate(words) :
+ words[i] = word.lower().strip('.,/1234567890@
+ return words
+def printing(words):
+ l = []
+ for word in words:
+ if re.search('кот', word) and word not in l:
+ l.append(word)
+ print(word)
+def main():
+ words = reading(input('Введите, пожалуйста, название файла:\n'))
+ words = cleaning(words)
+ printing(words)
+if __name__ == '__main__':
+ main()
+
+n = float(input('Введите любое число\n'))
+print('число | ', n)
+for i in range(9 + len(str(round(n * 10, 3)))) :
+ print('-', end = '')
+print()
+for i in range(1,11) :
+ if i != 10 :
+ print(i, ' | ', round(i * n, 3), end = '\n')
+ else :
+ print(i, ' | ', round(i * n, 3), end = '\n')
+
+n = float(input('Введите любое число\n'))
+for i in range(1,11) :
+ print(i, '*', n, '=', i * n, end = '\n')
+import re
+def reading(name):
+ f = open(name, 'r', encoding = 'utf-8')
+ lines = f.readlines()
+ f.close()
+ return lines
+def array(lines):
+ text = ''.join(lines)
+ text = re.sub('((.|\n)*)', '\\1', text)
+ text = re.sub('<[wc](.*?)>(.*?)[wc]>', '\\1 \\2', text)
+ print(text)
+ l = re.findall('lemma="(.*?)" type="(.*?)" (.*)', text)
+ return l
+def recording1(d, n):
+ f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8')
+ f.write(str(n) + '\n')
+ for key in d.keys():
+ f.write(key + '\n')
+ f.close()
+def recording2(d):
+ f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8')
+
+ for key, value in d.items():
+ if re.search('l.f.*', key):
+ f.write(key + ' - ' + str(value) + '\n')
+ f.close()
+def recording3(l):
+ name = input('Введите, пожалуйста, название выходного файла в формате csv\n')
+
+ while not name.endswith('.csv'):
+ name = input('Введите, пожалуйста, название выходного файла в формате csv\n')
+ f = open(name, 'a', encoding = 'utf-8')
+ for i, elem in enumerate(l):
+ f.write(elem[0] + ',' + elem[1] + ',' + elem[2] + '\n')
+ f.close()
+def dictionary(lines):
+ d = {}
+ for line in lines:
+ r = re.search('lemma=".*" type="(.*)"', line)
+ if r:
+ key = r.group(1)
+ if key in d:
+ d[key] += 1
+ else:
+ d[key] = 1
+ return d
+def main():
+ name = input('Введите, пожалуйста, название входного файла\n')
+ lines = reading(name)
+ n = len(lines)
+ d = dictionary(lines)
+ recording1(d, n)
+ recording2(d)
+ l = array(lines)
+ recording3(l)
+if __name__== '__main__':
+ main()
+import random
+
+def noun() :
+ f = open('nouns.txt', 'r', encoding = 'utf-8')
+ nouns = f.read().split()
+ f.close()
+ return random.choice(nouns)
+
+def personal_pronoun() :
+ f = open('personal_pronouns.txt', 'r', encoding = 'utf-8')
+ pronouns = f.read().split()
+ f.close()
+ return random.choice(pronouns)
+
+def adjective_before_noun() :
+ f = open('adjectives_before_noun.txt', 'r', encoding = 'utf-8')
+ adj = f.read().split()
+ f.close()
+ return random.choice(adj)
+
+def adjective_after_noun() :
+ f = open('adjectives_after_noun.txt', 'r', encoding = 'utf-8')
+ adj = f.read().split()
+ f.close()
+ return random.choice(adj)
+
+def adverb() :
+ f = open('adverbs.txt', 'r', encoding = 'utf-8')
+ adverbs = f.read().split()
+ f.close()
+ return random.choice(adverbs)
+
+def intensifier(adv):
+ f = open('intensifiers.txt', 'r', encoding = 'utf-8')
+ intensifiers = f.read().split()
+ f.close()
+ return random.choice(intensifiers) + ' ' + adv
+
+def transitive_infinitive() :
+ f = open('transitive_infinitives.txt', 'r', encoding = 'utf-8')
+ inf = f.read().split()
+ f.close()
+ return random.choice(inf)
+
+def intransitive_infinitive() :
+ f = open('intransitive_infinitives.txt', 'r', encoding = 'utf-8')
+ inf = f.read().split()
+ f.close()
+ return random.choice(inf)
+
+def temporary_marker() :
+ f = open('temporary_markers.txt', 'r', encoding = 'utf-8')
+ temporary_markers = f.read().split()
+ f.close()
+ return random.choice(temporary_markers)
+
+def interrogative() :
+ f = open('interrogatives.txt', 'r', encoding = 'utf-8')
+ interrogatives = f.read().split()
+ f.close()
+ return random.choice(interrogatives)
+
+def number() :
+ f = open('numbers.txt', 'r', encoding = 'utf-8')
+ numbers = f.read().split()
+ f.close()
+ return random.choice(numbers)
+
+
+def declension(noun, adjective, number) :
+ f = open('declension_of_nouns.txt', 'r', encoding = 'utf-8')
+ g = open('declension_of_adjectives.txt', 'r', encoding = 'utf-8')
+ nouns = dict()
+ adjectives = dict()
+ for line in f.readlines() :
+ s = line.split(' ', maxsplit = 1)
+ nouns[s[0]] = s[1].split()
+ for line in g.readlines() :
+ s = line.split(' ', maxsplit = 1)
+ adjectives[s[0]] = s[1].split()
+ f.close()
+ g.close()
+ if nouns[noun][0] == 'm' and number == 'sg' :
+ return noun, adjective, random.choice(['le', 'un'])
+ elif nouns[noun][0] == 'm' and number == 'pl' :
+ return nouns[noun][1], adjectives[adjective][1], random.choice(['les', 'des'])
+ elif nouns[noun][0] == 'f' and number == 'sg' :
+ return noun, adjectives[adjective][0], random.choice(['la', 'une'])
+ elif nouns[noun][0] == 'f' and number == 'pl' :
+ return nouns[noun][1], adjectives[adjective][2], random.choice(['les', 'des'])
+
+def collocation_bef(noun, adj_before_noun, article) :
+ return article + ' ' + adj_before_noun + ' ' + noun
+
+def collocation_aft(noun, adj_after_noun, article) :
+ return article + ' ' + noun + ' ' + adj_after_noun
+
+def conjugation(pronoun, infinitive) :
+ f = open('conjugations.txt', 'r', encoding = 'utf-8')
+ verbs = dict()
+ for line in f.readlines() :
+ s = line.split(' ', maxsplit = 1)
+ verbs[s[0]] = s[1].split()
+ f.close()
+ if pronoun == 'je' :
+ return verbs[infinitive][0]
+ elif pronoun == 'tu' :
+ return verbs[infinitive][1]
+ elif pronoun == 'il' or pronoun == 'elle' :
+ return verbs[infinitive][2]
+ elif pronoun == 'nous' :
+ return verbs[infinitive][3]
+ elif pronoun == 'vous' :
+ return verbs[infinitive][4]
+ else :
+ return verbs[infinitive][5]
+
+def affirmative_sequence(pronoun, verb) :
+ if verb[0] in 'aàâeéèêiîoôuùûy' and pronoun == 'je' :
+ return "j'" + verb
+ else :
+ return pronoun + ' ' + verb
+
+def interrogative_sequence(pronoun, verb) :
+ if verb[len(verb) - 1] in 'aàâeéèêiîoôuùûy' and pronoun[0] in 'aàâeéèêiîoôuùûy' :
+ return verb + '-t-' + pronoun
+ else :
+ return verb + '-' + pronoun
+
+def affirmative_sentence() :
+ pron = personal_pronoun()
+ noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'sg')
+ noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'pl')
+ return affirmative_sequence(pron, conjugation(pron, transitive_infinitive())) + ' ' + collocation_bef(noun1, adj1, art1) + ' et ' + number() + ' ' + adj2 + ' ' + noun2 + '.'
+
+def interrogative_sentence() :
+ pron = personal_pronoun()
+ return interrogative() + ' ' + interrogative_sequence(pron, conjugation(pron, intransitive_infinitive())) + ' ' + temporary_marker() + '?'
+
+def negative_sentence() :
+ noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'pl')
+ noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'sg')
+ return collocation_aft(noun1, adj1, art1) + ' ne ' + conjugation('elle', transitive_infinitive()) + ' pas ' + collocation_bef(noun2, adj2, art2) + ' ' + temporary_marker() + ' ' + intensifier(adverb()) + '.'
+
+def conditional_sentence() :
+ return '[Здесь должно быть условное предложение, но я пока не представляю, как оно устроено во французском :( ].'
+
+def imperative_sentence() :
+ return 'ne ' + conjugation('vous', intransitive_infinitive()) + ' pas' +'!'
+
+def random_sentence(n) :
+ if n == 1 :
+ return affirmative_sentence()
+ elif n == 2 :
+ return interrogative_sentence()
+ elif n == 3 :
+ return negative_sentence()
+ elif n == 4 :
+ return conditional_sentence()
+ else :
+ return imperative_sentence()
+
+def text_print() :
+ a = set('12345')
+ for n in a :
+ print(random_sentence(int(n)).capitalize(), end = ' ')
+
+text_print()
+
+import re
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def reading(name):
+ f = open(name, 'r', encoding = 'utf-8')
+ words = f.read().replace('\n', ' ').split()
+ f.close()
+ return words
+
+def cleaning(words):
+ for i, word in enumerate(words) :
+ words[i] = word.lower().strip('.,/1234567890@
+ return words
+
+def printing(words):
+ for word in words:
+ if re.search('загру(з(ят(ся)?|и(шь(ся)?|(сь)?|м(ся)?|л((ся)?|а(сь)?|и(сь)?|о(сь)?)|т((ся)?|е(сь)?|ь(ся)?)|в(ш(ую(ся)?|ая(ся)?|е(го(ся)?|му?(ся)?|й(ся)?|е(ся)?|ю(ся)?)|и((сь)?|й(ся)?|м(и)?(ся)?|е(ся)?|х(ся)?)))?))|ж(у(сь)?|ен(а|о|ы)?|ён|(е|ё)нн(ая|ую|о(м(у)?|ю|е|го|й)|ы(м(и)?|й|е|х))))$', word):
+ print(word)
+def main():
+ words = cleaning(reading(input('Введите, пожалуйста, название файла:\n')))
+ printing(words)
+main()
+
+import re
+def reading(name):
+ f = open(name, 'r', encoding = 'utf-8')
+ text = f.read()
+ f.close()
+ return text
+def find(text):
+ r = re.search('\= 0 :
+ bigw += 1
+ else :
+ j = 0
+ while j < len(l[i]) and alph.find(l[i][j]) == -1 :
+ j += 1
+ if j == len(l[i]) :
+ allw -= 1
+ elif alphUP.find(l[i][j]) >= 0 :
+ bigw += 1
+if allw != 0 :
+ print('The percentage of words, which start with uppercase equals to ', round(bigw / allw * 100, 3), '%', sep = '')
+else :
+ if fl == 0 :
+ print('There are no words at all! Try to use another file.')
+ else :
+ print('There are some symbols, but no words in Russian. Try to use another file!')
+f.close()
+
+
+
+fl = 0
+while fl != 1 :
+ word = input('Please input one word:\n')
+ ind = word.find(' ')
+ if ind == -1 :
+ fl = 1
+ else :
+ if ind == 0 :
+ word = word[1:]
+ flag = word.find(' ')
+ while flag == 0 :
+ word = word[1:]
+ flag = word.find(' ')
+ if flag > 0 :
+ subword = word[flag:]
+ ind = subword.find(' ')
+ while ind == 0 :
+ subword = subword[1:]
+ ind = subword.find(' ')
+ if subword != '' :
+ print('There is more than one word. Please try again!')
+ else :
+ word = word[:flag]
+ fl = 1
+ else :
+ if word != '' :
+ fl = 1
+ else :
+ print("You didn't type any word! Please try again!")
+ else :
+ subword = word[ind:]
+ flag = subword.find(' ')
+ while flag == 0 :
+ subword = subword[1:]
+ flag = subword.find(' ')
+ if subword != '' :
+ print('There is more than one word. Please try again!')
+ else :
+ word = word[:ind]
+ fl = 1
+for i in range(len(word)) :
+ print(word[i:])
+
+fl = 0
+while fl != 1 :
+ word = input('Please input one word:\n')
+ word = word.strip()
+ ind = word.find(' ')
+ if ind == -1 :
+ if word != '' :
+ fl = 1
+ else :
+ print("You didn't type any word! Please try again")
+ else :
+ print('There is more than one word. Please try again!')
+for i in range(len(word)) :
+ print(word[i:])
+
+
+
+a = float(input('enter the first number\n'))
+b = float(input('enter the second number\n'))
+c = float(input('enter the third number\n'))
+if b == 0. :
+ print('you can\'t divide by zero')
+elif a % b == c and a / b == c :
+ print('YES')
+else :
+ print('NO')
+
+
+a = float(input('enter the first number\n'))
+b = float(input('enter the second number\n'))
+c = float(input('enter the third number\n'))
+if b == 0. :
+ print('you can\'t divide by zero')
+else :
+ if a % b == c :
+ print('YES, a % b == c')
+ else :
+ print('NO, a % b != c')
+ if a / b == c :
+ print('YES, a / b == c')
+ else :
+ print('NO, a / b != c')
+
+
+a = int(input('enter the first number\n'))
+b = int(input('enter the second number\n'))
+c = int(input('enter the third number\n'))
+if b == 0 :
+ print('you can\'t divide by zero')
+elif a % b == c and a / b == c :
+ print('YES')
+else :
+ print('NO')
+
+
+a = int(input('enter the first number\n'))
+b = int(input('enter the second number\n'))
+c = int(input('enter the third number\n'))
+if b == 0 :
+ print('you can\'t divide by zero')
+else :
+ if a % b == c :
+ print('YES, a % b == c')
+ else :
+ print('NO, a % b != c')
+ if a / b == c :
+ print('YES, a / b == c')
+ else :
+ print('NO, a / b != c')
+import re
+import os
+import csv
+def printing(d1, d2, arr):
+ f = open('output1.txt', 'w', encoding = 'cp1251')
+ for key, value in sorted(d1.items()):
+ f.write(key + '\t' + str(value) + '\n')
+ f.close()
+ with open('output2.csv', 'w', encoding = 'cp1251') as csv_file:
+ writer = csv.writer(csv_file, delimiter = ';')
+ writer.writerow(['Название файла', 'Автор', 'Дата создания текста'])
+ for key, value in sorted(d2.items()):
+ lst = [str(key), str(value[0]), str(value[1])]
+ writer.writerow(lst)
+ f = open('output3.txt', 'w', encoding = 'cp1251')
+ for elem in arr:
+ f.write(elem + '\n')
+ f.close()
+def dictionary(name):
+ d1 = {}
+ d2 = {}
+ arr = []
+ for file in os.listdir(name):
+ with open(os.path.join(name, file), 'r', encoding = 'cp1251') as text:
+ text = text.read()
+ a = re.findall('(.*?)', text)
+ d1[file] = len(a)
+ b = re.findall('<.*?>(.*?)([\s,.!123456790:;?""])', text)
+ words = [words_punct[i][0] for i in range(len(words_punct))]
+ puncts = [words_punct[i][1] for i in range(len(words_punct))]
+ d = re.findall('gr="A.*?gen.*?>(.*?)\s.*?gr="S.*?gen.*?>(.*?)', text)
+ e = []
+ for i, elem in enumerate(d):
+ ind1 = words.index(elem[0])
+ ind2 = words.index(elem[1])
+ if ind2 - ind1 == 1:
+ t = ind1 - 1
+ while t >= 0 and puncts[t] not in '[.?!]':
+ t -= 1
+ k = ind2
+ while k <= len(words) - 1 and puncts[k] not in '[.?!]':
+ k += 1
+ s = ''
+ for p in range(t + 1, k):
+ if p != ind1 and p != ind2:
+ s += words[p] + puncts[p]
+ elif p == ind1:
+ s += '\t' + words[p] + puncts[p]
+ else:
+ s += words[p] + puncts[p] + 't'
+ e.append(s)
+ arr.extend(e)
+ return d1, d2, arr
+def main():
+ d1, d2, arr = dictionary('news')
+ printing(d1, d2, arr)
+if __name__ == '__main__':
+ main()
+
+import re
+def reading():
+ f = open('input.txt', 'r', encoding = 'utf-8')
+
+ text = f.read()
+ f.close()
+ text = text.replace('...', '.')
+
+ text = text.replace('—', '')
+
+
+ text = re.sub('[\.!\?]([а-яa-z])', ' \\1', text)
+ text = re.sub('[\.!\?]\)?»? ?«?\(?([а-яa-z])', ' \\1', text)
+
+
+ text = re.sub('\.([A-ZА-Я])', ' \\1', text)
+
+ text = re.sub('([A-ZА-Я])\. ([A-ZА-Я])', '\\1 \\2', text)
+
+
+
+ sentences = re.split(r'[.!?]', text)
+
+
+ sentences = [' '.join([word.strip('» «\n:<>\'"@
+
+ return sentences
+def output(sentences):
+ f = open('output.txt', 'a', encoding = 'utf-8')
+ for sentence in sentences:
+ if len(sentence.split()) > 10:
+ s = 0
+ for word in sentence.split():
+ s += len(word)
+ f.write('"{}": это предложение со словами длины {:.1f}\n'.format(sentence, s/len(sentence.split())))
+ f.close()
+def main():
+ sentences = reading()
+ output(sentences)
+if __name__ == '__main__':
+ main()
+print('Введите 7 целых чисел')
+arr = []
+for i in range(1, 8) :
+ print('Введите ', i, '-ое целое число', sep = '')
+ arr.append(int(input()))
+for i in range(7) :
+ for j in range(arr[i]) :
+ print('X', end = '')
+ print()
+
+import re
+def reading(name):
+ f = open(name, 'r', encoding = 'utf-8')
+ text = f.read()
+ f.close()
+ return text
+def correction(text):
+ corrected_text = re.sub('(Ф|ф)инлянди( |я(х|(ми?))?|и|й|ю|е(й|ю))', '\\1@алайзи\\2', text)
+ corrected_text = re.sub('ФИНЛЯНДИ( |Я(Х|(МИ?))?|И|Й|Ю|Е(Й|Ю))', 'МАЛАЙЗИ\\1', corrected_text)
+ corrected_text = corrected_text.replace('Ф@', 'М')
+ corrected_text = corrected_text.replace('ф@', 'м')
+ return corrected_text
+def recording(text):
+ f = open(input('Введите, пожалуйста, название файла вывода:\n'), 'w', encoding = 'utf-8')
+ f.write(text)
+ f.close()
+def main():
+ text = reading(input('Введите, пожалуйста, название файла ввода:\n'))
+ corrected_text = correction(text)
+ recording(corrected_text)
+if __name__ == '__main__':
+ main()
+import re
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def search(text):
+ m = re.findall('\\bдинозавр(a(ми|х)?|у|о(м|в)|е|ы)', text)
+ return m
+def tags(text):
+ m = re.sub('<.*?>', '', text, flags = re.DOTALL)
+ return m
+def replace(text):
+ a = re.sub('\\bдинозавр', 'кот', text, flags = re.DOTALL)
+ return a
+def images(text):
+ n = re.sub('(а|е|ё|и|оуэюя)')
+fname = input()
+txt = open_text(fname)
+res = replace(txt)
+print(res)
+import re
+import os
+def auth(direct):
+ d = {}
+ for root, dirs, files in os.walk(direct):
+ for file in files:
+ with open(os.path.join(direct, file)) as f:
+ text = f.read()
+ regex1 = 'content=".*" name="author"'
+ a = re.findall(regex1, text)
+ for elem in a:
+ b = re.sub('content="', '', elem)
+ c = re.sub('" name="author"', '', b)
+ d[file] = c
+ return d
+def topic(direct):
+ d = {}
+ for root, dirs, files in os.walk(direct):
+ for file in files:
+ with open(os.path.join(direct, file)) as f:
+ text = f.read()
+ regex1 = 'content=".*" name="topic"'
+ a = re.findall(regex1, text)
+ for elem in a:
+ b = re.sub('content="', '', elem)
+ c = re.sub('" name="topic"', '', b)
+ d[file] = c
+ return d
+def main():
+ direct = './news'
+ d1 = auth(direct)
+ d2 = topic(direct)
+ with open('./15.csv', 'w', encoding='utf-8') as f:
+ for key in d1.keys():
+ f.write('\n{}'.format(key))
+ f.write(' {} '.format(d1[key]))
+ f.write('{}'.format(d2[key]))
+main()
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for index, elem in enumerate(arr):
+ arr[index] = elem.strip(',.;:!?\n ')
+ return arr
+def first_letter(letter, way_to_file):
+ arr = open_text(way_to_file)
+ array = []
+ for elem in arr:
+ if elem[0] == letter:
+ array.append(elem)
+ return array
+def questions():
+ letter = input()
+ fname = input()
+ result = first_letter(letter, fname)
+ return result
+result = questions()
+print(result)
+import re
+import os
+def number_sent(direct):
+ d = {}
+ for root, dirs, files in os.walk(direct):
+ for file in files:
+ with open(os.path.join(direct, file)) as f:
+ text = f.read()
+ regex = ''
+ arr = re.findall(regex, text)
+ d[file]=len(arr)
+ return d
+def main():
+ direct = './news'
+ d = number_sent(direct)
+ with open('./11.txt', 'w', encoding='utf-8') as f:
+ for key in d.keys():
+ f.write('\n{} {}'.format(key, d[key]))
+main()
+
+import random
+
+def imperative():
+
+
+ imperative = ["прокати", "уходи", "не спеши", "погоди", "подожди", "позвони", "убегай", "не плошай", "подержи"]
+ return random.choice(imperative)
+def verb():
+
+
+ plural_verbs = ["привезут", "принесли", "принесут", "пожуют", "погрызут", "приплетут", "приведут", "привели"]
+ return random.choice(plural_verbs)
+def noun_phrase():
+
+
+ clitics = ["по", "ни", "на", "хоть", "лишь", "вот", "не", "от", "за", "пусть"]
+ clitic = random.choice(clitics)
+
+ words2 = ["себе", "тебе", "земля", "игра", "звезда", "мороз", "ответ", "превед", "футбол", "печаль", "бокал"]
+ noun = random.choice(words2)
+ return clitic + ' ' + noun
+def noun(number):
+
+
+ singular_nouns = ["монолог", "коридор", "почему", "потому", "отчего", "каратэ", "кабарэ", "курага", "кандидат"]
+ plural_nouns = ["малыши", "рукава", "камыши", "табуны", "рюкзаки", "пиджаки", "пацаны", "чуваки"]
+
+ if number == 's':
+ return random.choice(singular_nouns)
+
+ return random.choice(plural_nouns)
+def punctuation():
+
+ marks = [".", "?", "!", "..."]
+ return random.choice(marks)
+def verse1():
+
+
+ return noun('pl') + ' ' + verb() + ' ' + noun('pl') + punctuation()
+def verse2():
+
+
+ return imperative() + ' ' + noun('s') + ' ' + noun_phrase() + punctuation()
+def verse3():
+
+
+ return noun_phrase() + ' ' + verb() + ' ' + noun('pl') + punctuation()
+def make_verse():
+
+ verse = random.choice([1,2,3])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ else:
+ return verse3()
+
+
+for n in range(4):
+ print(make_verse())
+
+word=input()
+for index, elem in enumerate(word):
+ if (index + 1) % 2 ==1:
+ if elem in 'пое':
+ print(elem)
+ else:
+ continue
+ else:
+ continue
+message=input('Введите слово или сообщение: ')
+result=''
+for letter in message:
+ result += letter
+ print(result)
+import re
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for index, elem in enumerate(arr):
+ arr[index] = elem.strip(',.;:!?\n ')
+ return arr
+def main():
+ reglex = 'на(й(ти|д(я|ут?|((е|ё)(шь|т|м|те)|ен(а|о|ы)?)))|ш((е|ё)л|л(а|о|и))|шедш(е(е|й|го|му?)|ая|ую|и(й|е|х|ми?))|йденн(о(е|го|ому?)|ая|ой|ую|ы(й|е|х|ми?))(с(ь|я))?)'
+ fname = input()
+ arr = open_text(fname)
+ array = []
+ for elem in arr:
+ m = re.search(reglex, elem)
+ if m != None:
+ if elem not in array:
+ array.append(elem)
+ return array
+result = main()
+print(result)
+
+import random
+def organising_array(way_to_file):
+ f = open(way_to_file, 'r', encoding = 'utf-8')
+ file = f.read()
+ arr = file.split('\n')
+ return arr
+def noun_phrase():
+ adjectives = organising_array('./1.txt')
+ adjective = random.choice(adjectives)
+ nouns = organising_array('./2.txt')
+ noun = random.choice(nouns)
+ return adjective + ' ' + noun
+def clause():
+ clauses = organising_array('./3.txt')
+ return random.choice(clauses)
+def adverb():
+ adverbs = organising_array('./4.txt')
+ return random.choice(adverbs)
+def clause2():
+ clitics = organising_array('./5.txt')
+ clitic = random.choice(clitics)
+ pronouns = organising_array('./6.txt')
+ pronoun = random.choice(pronouns)
+ verbs = organising_array('./7.txt')
+ verb = random.choice(verbs)
+ return clitic + ' ' + pronoun + ' ' + verb
+def objects():
+ objects = organising_array('./8.txt')
+ return random.choice(objects)
+def patient():
+ patients = organising_array('./9.txt')
+ return random.choice(patients)
+def verb():
+ verbs = organising_array('./10.txt')
+ return random.choice(verbs)
+def praep_phrase():
+ praeps = organising_array('./11.txt')
+ praep = random.choice(praeps)
+ nouns = organising_array('./12.txt')
+ noun = random.choice(nouns)
+ return praep + ' ' + noun
+def adjective():
+ adjectives = organising_array('./13.txt')
+ return random.choice(adjectives)
+def punctuation():
+ marks = organising_array('./14.txt')
+ return random.choice(marks)
+def verse1():
+ return noun_phrase()+ punctuation() + ' ' + clause() + punctuation()
+def verse2():
+ return adverb() + ' ' + clause2() + ' ' + objects() + punctuation()
+def verse3():
+ return patient() + ' ' + verb() + ' ' + praep_phrase() + ' ' + adjective() + punctuation()
+def make_verse():
+ verse = random.choice([1,2,3])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ else:
+ return verse3()
+for n in range(4):
+ print(make_verse())
+import re
+def open_text(way):
+ with open(way, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.replace(',', '')
+ text = text.replace('.', '')
+ text = text.replace(':', '')
+ text = text.replace(';', '')
+ text = text.replace('!', '')
+ text = text.replace('?', '')
+ text = text.replace('-', '')
+ text = text.replace('"', '')
+ text = text.replace('(', '')
+ text = text.replace(')', '')
+ return text
+def search(text):
+ regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b'
+ m = re.findall(regex, text, flags = re.DOTALL)
+ return m
+def write(fname, m):
+ with open(fname, 'a', encoding = 'utf-8') as f:
+ for elem in m:
+ f.write(elem)
+ f.write('\n')
+way = input()
+fname = input()
+text = open_text(way)
+m = search(text)
+write(fname, m)
+def open_text(way):
+ with open(way, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for index, elem in enumerate(arr):
+ arr[index] = elem.strip(',.;:!?-')
+ return arr
+def main():
+ fname = input()
+ arr = open_text(fname)
+ n = len(arr)
+ return n
+res = main()
+print('В файле содержится ', res, ' слов')
+def open_text(way):
+ with open(way, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for index, elem in enumerate(arr):
+ arr[index] = elem.strip(',.;:!?-"')
+ return arr
+def freq_list(arr):
+ d = {}
+ for elem in arr:
+ if elem not in d.keys():
+ d[elem] = 1
+ else:
+ d[elem] = d[elem] + 1
+ return d
+def sort(d):
+ array = []
+ for elem in d.keys():
+ array.append(elem)
+ arr = []
+ for i in range(len(array)):
+ temp = array[i]
+ for index, elem in enumerate(array):
+ if elem < temp:
+ t = temp
+ temp = elem
+ array[index] = t
+ if temp not in arr:
+ arr.append(temp)
+ return arr
+def write(fname, d, arr):
+ with open(fname, 'a', encoding = 'utf-8') as f:
+ for elem in arr:
+ f.write(elem)
+ f.write(',')
+ f.write(str(d[elem]))
+ f.write('\n')
+fname = input()
+fname2 = input()
+arr = open_text(fname)
+d = freq_list(arr)
+a = sort(d)
+write(fname2, d, a)
+import re
+def open_text(way):
+ with open(way, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = re.sub(',.:;!?-"()\n', '', text)
+ arr = text.split()
+ return arr
+def search(arr):
+ result = []
+ regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b'
+ for i in range(len(arr) - 7):
+ current = ' '.join(arr[i:i+7])
+ m = re.search(regex, current)
+ if m is not None:
+ result.append(current)
+ return result
+def write(fname, m):
+ with open(fname, 'a', encoding = 'utf-8') as f:
+ for elem in m:
+ f.write(elem)
+ f.write('\n')
+way = input()
+fname = input()
+text = open_text(way)
+m = search(text)
+write(fname, m)
+import random
+def opening_csv(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ arr = text.split('\n')
+ d = {}
+ for elem in arr:
+ array = elem.split(',')
+ d[array[0]] = array[1]
+ return d
+def random_key(d):
+ array = []
+ for elem in d.keys():
+ array.append(elem)
+ word = random.choice(array)
+ return word
+def main():
+ way_to_file = input()
+ vocabul = opening_csv(way_to_file)
+ word = random_key(vocabul)
+ print(word)
+ for i in range(len(vocabul[word])):
+ print('_', end = ' ')
+ solve = input()
+ if solve == vocabul[word]:
+ result = 'WIN!!!'
+ else:
+ result = 'FAIL((('
+ return result
+res = main()
+print(res)
+import re
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def search(text):
+ text1 = re.sub('<.*?>', '', text, flags = re.DOTALL)
+ text2 = re.sub('\n', '', text1, flags = re.DOTALL)
+ m = re.findall('Часовой поясUTC.?[0-9]', text2)
+ return m
+def write(arr, way_to_file2):
+ with open(way_to_file2, 'a', encoding = 'utf-8') as f:
+ for elem in arr:
+ newtext = f.write(elem)
+ return newtext
+def main():
+ fname1 = input()
+ fname2 = input()
+ t = open_text(fname1)
+ txt = search(t)
+ res = write(txt, fname2)
+ return res
+a = main()
+import os
+import re
+def no_numbers():
+ num = '(1|2|3|4|5|6|7|8|9|0)'
+ file = '\.'
+ a = []
+ for elem in os.listdir('.'):
+ res = re.search(num, elem)
+ if res == None:
+ result = re.search(file, elem)
+ if result:
+ a.append(elem)
+ n = len(a)
+ return n
+def no_repet():
+ arr = []
+ for elem in os.listdir('.'):
+ a = re.sub('\..*', '', elem)
+ if a not in arr:
+ arr.append(a)
+ return arr
+print('Количество файлов без цифр в названии равно', no_numbers())
+print('Найдены следующие файлы и папки (без повторов):', no_repet())
+a=[]
+word=input()
+while word:
+ a.append(word)
+ word=input()
+for el in a:
+ if len(el)>5:
+ print(el)
+ else:
+ continue
+import re
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def replace1(txt):
+ m = re.sub(r'\bвикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bбурундук\1', txt, flags = re.DOTALL)
+ return m
+def replace2(txt):
+ n = re.sub(r'\bВикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bБурундук\1', txt, flags = re.DOTALL)
+ return n
+def write(txt, way_to_file2):
+ with open(way_to_file2, 'w', encoding = 'utf-8') as f:
+ newtext = f.write(txt)
+ return newtext
+def main():
+ fname1 = input()
+ fname2 = input()
+ txt = open_text(fname1)
+ r = replace1(txt)
+ res = replace2(r)
+ result = write(res, fname2)
+ return result
+a = main()
+import os
+import re
+def dir_choose_kyr(dir_name):
+ arr = []
+ regex ='[А-Я|Ё|а-я|ё| ]*'
+ for root, dirs, files in os.walk(dir_name):
+ for elem in dirs:
+ r = re.sub(regex, '', elem)
+ if r == '':
+ arr.append(elem)
+ print(arr)
+ n = len(arr)
+ return n
+def main():
+ dir_name = '.'
+ n = dir_choose_kyr(dir_name)
+ print('В папке найдено ', n, ' папок с полностью кириллическими названиями (допускаются пробелы между словами)')
+ return n
+main()
+way = input('Введите путь к файлу без дополнительных символов: ', )
+f = open(way, 'r', encoding = 'utf-8')
+text = f.read()
+f.close()
+min = len(text)
+max = 0
+arr = text.split('\n')
+for el in arr:
+ if len(el) > max:
+ max = len(el)
+ if len(el) < min:
+ min = len(el)
+k = max/min
+print('Самая короткая строка короче самой длинной в ', k, ' раз(а)')
+def open_text(way_to_file):
+ with open(way_to_file, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for index, elem in enumerate(arr):
+ arr[index] = elem.strip(',.;:!?\n ')
+ return arr
+def finding_suffix(suffix, way_to_file):
+ arr = open_text(way_to_file)
+ array = []
+ for elem in arr:
+ a = len(elem) - len(suffix)
+ b = len(elem)
+ if elem[a:b] == suffix:
+ array.append(elem)
+ return array
+def one_word_once(array):
+ arr = []
+ for elem in array:
+ if elem not in arr:
+ arr.append(elem)
+ return arr
+def func(array):
+ temp = []
+ arr = []
+ for elem in array:
+ if elem not in temp:
+ temp.append(elem)
+ else:
+ arr.append(elem)
+ return arr
+def count_freq(array):
+ result = array
+ for i in range(len(array)):
+ temp = func(result)
+ if len(temp)==0:
+ break
+ else:
+ result = temp
+ return result
+fname = input()
+suffix = 'ness'
+arr = finding_suffix(suffix, fname)
+array = one_word_once(arr)
+print('В тексте имеются следующие слова с суффиксом ', suffix, ':')
+for elem in array:
+ print(elem)
+max_freq = count_freq(arr)
+print('Макс. частоту имеет(-ют) слово(-а):', max_freq)
+import os,re
+def counting_sentences(file):
+ sentences = re.findall('', file)
+ return len(sentences)
+def opening_folders(folder):
+ path = folder
+ dic = {}
+ for file in os.listdir(folder):
+ with open ((os.path.join(folder, file))) as f:
+ text = f.read()
+ number = int(counting_sentences(text))
+ dic[file] = number
+ return dic
+def writing_table(dic):
+ with open ('number_of-sentences.txt', 'w', encoding = 'utf-8') as f:
+ for file in dic:
+ f.writelines(file + '\t' + str(dic[file]) + '\n')
+def author_and_topic(folder):
+ path = folder
+ for file in os.listdir(folder):
+ with open ((os.path.join(folder, file))) as f:
+ text = f.read()
+ reg1 = '(content="(.*)" name="author")'
+ reg2 = '(content="(.*)" name="topic")'
+ for i in range (1):
+ for i in re.findall(reg1, text):
+ author = i[1]
+ for i in re.findall(reg2, text):
+ topic = i[1]
+writing_table(opening_folders(r'C:\Users/student/Desktop/news/'))
+author_and_topic(r'C:\Users/student/Desktop/news/')
+
+import os
+number = 0
+for roots, dirs, files in os.walk('.'):
+ names = []
+ for f in files:
+ name = f[::-1].split('.')[0]
+ if name not in names:
+ names.append(name)
+ else:
+ number += 1
+ break
+print(number)
+import re
+m1 = 'загруж(у|(енн?(ы(й|е|х|ми?)?|о(го|му?|е|й)?|ую|а)?))(с(я|ь))?'
+m2 = 'загруз(и(т|шь|м|т(ь|е)?|л(а|о|и)?)?(в(ш(и(й|ми?|х)|е(го|му?|е|й)))?)?|ят)(с(я|ь))?'
+with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW9\re.txt",'r', encoding='utf-8') as f:
+ mas = []
+ for line in f:
+ words = line.split()
+ for word in words:
+ word = word.strip(',.;"()-!?')
+ mas.append(word.lower())
+arr = []
+for i in mas:
+ a = re.search(m1,i)
+ b = re.search(m2,i)
+ if a != None and len(a.group()) == len(i):
+ if a.group() not in arr:
+ arr.append(a.group())
+ print(a.group())
+ if b != None and len(b.group()) == len(i):
+ if b.group() not in arr:
+ arr.append(b.group())
+ print(b.group())
+
+i=1
+int(i)
+print ('Введите любое число')
+a=int(input())
+while i<=10:
+ print(i,'*',a,'=',(i*a),';')
+ i += 1
+else:
+ print ('Цикл завершен')
+def opening(name):
+ with open (name, 'r', encoding = 'utf-8') as f:
+ count = 0
+ for line in f:
+ if line !=' \n':
+ count += 1
+ else:
+ break
+ return count
+def writing():
+ count = opening(name)
+ with open('2.txt', 'w', encoding = 'utf-8') as f:
+ f.write(str(count))
+name = r'C:\Users\student\Desktop\1.xml'
+writing()
+import re
+with open (name, 'r', encoding = 'utf-8') as f:
+ content = f.read()
+ arr = re.findall(r'(.*?)', content)
+ d = {}
+ for i in arr:
+ d[i[1]] = content.count(i[1])
+with open ('3.txt', 'w', encoding = 'utf-8') as f:
+ for key in d:
+ a = str(key) + ' ' + str(d[key]) + '\n'
+ f.write(a)
+
+
+
+def reading():
+ with open (r"C:\Users\Анна\Documents\ФиКЛ\PythonHW7\omni.txt", 'r', encoding='utf8') as text:
+ mas = []
+ for line in text:
+ words = line.split()
+ for word in words:
+ word = word.strip(',.;"()-!?')
+ mas.append(word.lower())
+ return(mas)
+def omni_counting():
+ s = 0
+ omni = []
+ for word in reading():
+ if word[:4] == 'omni':
+ s += 1
+ if word not in omni:
+ omni.append(word)
+ print (s,'words with OMNI-')
+ p = 0
+ for word in omni:
+ without_omni = []
+ w2 = word[4:]
+ if w2 not in without_omni:
+ without_omni.append(w2)
+ p += int(reading().count(w2))
+ print(p, 'words without OMNI-')
+omni_counting()
+
+import os, re
+def folder_opening(big_folder):
+ names = [item for item in os.listdir(big_folder) if os.path.isfile(item) and re.search('[^.]*\..*?[,._?<>''""!-()].*?',str(item)[::-1])]
+ return len(names)
+print('Найдено',folder_opening('.'), 'файлов, название которых содержит знаки препинания')
+def all_files(big_folder):
+ files = [item[::-1] for item in os.listdir(big_folder) if os.path.isfile(item)]
+ all_files = []
+ for item in files:
+ all_files.append((re.sub(u'([^.]*\.)?(.*)', u'\\2', str(item))[::-1]))
+ for item in os.listdir(big_folder):
+ if os.path.isdir(item):
+ all_files.append(item)
+ all_files_new = []
+ for item in all_files:
+ if item not in all_files_new:
+ all_files_new.append(item)
+
+ return all_files_new
+print('Все файлы:', all_files('.'))
+
+total = 0
+upletters = 0
+with open(r'C:\Users\Анна\Documents\GitHub\prog\PythonHW5\text.txt','r',encoding='utf8') as f:
+ text = f.read()
+ words = text.split()
+ for item in words:
+ total += 1
+ for letter in item:
+ if letter.isupper():
+ upletters += 1
+ else:
+ continue
+print("Количество слов в тексте: ",total)
+print('Количество слов с заглавной буквы',upletters)
+print("Процент слов в тексте, начинающихся с заглавной буквы: ", upletters/total,'%')
+
+
+
+
+
+
+
+arr = []
+num = 1
+while num <=7:
+ chislo = int(input('Vvedite chisclo'))
+ if chislo > 0:
+ arr.append('X'*chislo)
+ else:
+ arr.append('')
+ num += 1
+for i in arr:
+ print(i)
+def opening(file):
+ with open(file, 'r', encoding = 'utf-8') as f:
+ f = f.read()
+ sentences = f.split('.')
+ mas = []
+ for sentence in sentences:
+ sentence = sentence.split('!')
+ for i in sentence:
+ i = i.split('?')
+ for a in i:
+ mas.append(a)
+ return mas
+for sentence in opening(r"C:\Users\Анна\Documents\GitHub\prog\PythonHW12\text.txt"):
+ words = sentence.split()
+ new_words = [word.strip('.,!?/-;:''""«»—()') for word in words if len(words) > 10]
+ print(new_words)
+ lenght = 0
+ for word in new_words:
+ lenght += len(word)
+ if new_words:
+ template = 'Это предложение со словами длины {:.1f}'
+ print (template.format(lenght/len(new_words)))
+print ('Введите три числа A,B и C')
+a = int(input ())
+b = int(input ())
+c = int(input ())
+print ("A =",a)
+print ("B =",b)
+print ("C =",c)
+print ('A+B =',(a+b))
+print ('A*C + B =',(a*c+b))
+if (a+b) == c:
+ if (a*c)+b==0:
+ print ('сумма чисел A и B равна С и число С является решением квадратного уравнения ax+b=c')
+ else:
+ print ('сумма чисел A и B равна С, но число С не является решением квадратного уравнения ax+b=c')
+elif (a*c)+b == 0:
+ print ('сумма чисел A и B не равна С, но число С является решением квадратного уравнения ax+b=c')
+else:
+ print ('сумма чисел A и B не равна С и число С не является решением квадратного уравнения ax+b=0')
+
+word = input('Vvedite slovo')
+offset = 1
+int(offset)
+for offset in range (len(word)):
+ print (word [offset::])
+with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW8\words.csv", 'r', encoding = 'utf-8') as text:
+ mas = []
+ for line in text:
+ words = line.split(',')
+ for word in words:
+ mas.append(word)
+words = {}
+for i in mas:
+ word = i.split(';')
+ words[word[0].strip()] = word[1].strip()
+for key in words:
+ print(key, '...')
+ p = 3
+ for i in range (3):
+ if input() != words[key]:
+ p -= 1
+ print('Осталось', p, 'попыток')
+ if p == 0:
+ print('Вы не угадали слово')
+ else:
+ print ('Ура! Вы угадали слово!')
+ break
+
+
+
+
+import random
+def nouns():
+ f = open (r'D:\Desktop\Аня\sentence_generator\nouns.txt','r', encoding = 'UTF-8')
+ a = f.read()
+ a = a.split()
+ arr = []
+ for w in a:
+ arr.append(w)
+ return random.choice(arr)
+ f.close()
+def adjectives():
+ f = open (r'D:\Desktop\Аня\sentence_generator\adjectives.txt','r', encoding = 'UTF-8')
+ a = f.read()
+ a = a.split()
+ arr = []
+ for w in a:
+ arr.append(w)
+ return random.choice(arr) + ' ' + nouns ()
+def verbs():
+ f = open (r'D:\Desktop\Аня\sentence_generator\verbs.txt','r', encoding = 'UTF-8')
+ a = f.read()
+ a = a.split()
+ arr = []
+ for w in a:
+ arr.append(w)
+ return random.choice(arr)
+ f.close()
+def adverbs():
+ f = open (r'D:\Desktop\Аня\sentence_generator\adverbs.txt','r', encoding = 'UTF-8')
+ a = f.read()
+ a = a.split()
+ arr = []
+ for w in a:
+ arr.append(w)
+ return random.choice(arr)
+ f.close()
+def assertion():
+ return(adjectives()) + ' ' + (verbs()) + 't' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs())
+def sentence():
+ return 'La ' + (assertion()) + '.'
+def negation():
+ return 'La ' + (adjectives()) + ' ' + 'ne' + ' ' + (verbs()) + 't' + ' ' + 'pas ' + 'une ' + (adjectives()) + ' '\
+ + (adverbs()) + '.'
+def question():
+ return 'La ' + (adjectives()) + ' ' + (verbs()) + 't' + '-elle ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '?'
+def conditions ():
+ return 'Si ' + (assertion()) + ', ' + (assertion()) + '.'
+def imperative():
+ a = str(verbs())
+ return (a.capitalize() + 's' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '!')
+mas = [(sentence()),(negation()),(question()),(conditions ()),(imperative ())]
+mass = []
+for i in range (len(mas)):
+ for item in mas:
+ randitem = random.choice(mas)
+ if randitem not in mass:
+ mass.append(randitem)
+ print(randitem)
+def content(name):
+ with open (name, 'r', encoding = 'utf-8') as f:
+ content = f.read()
+ return content
+name = r"C:\Users\Анна\Documents\GitHub\prog\PythonHW10\Squirrels.html"
+import re
+reg = u'( | Отряд: | \n(.*) | )'
+link = re.search(reg, content(name))
+link = ((re.search(('title="(.*)"'),link.group())).group()).strip('title="')
+print("Отряд", link)
+with open (r'C:\Users\Анна\Documents\GitHub\prog\PythonHW11\lingva.html', 'r', encoding = 'utf-8') as f:
+ content = f.read()
+import re
+article = re.sub(u'язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?!»():-;])', 'шашлык\\1', content)
+article2 = re.sub(u'Язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?»!():-;])', 'Шашлык\\1', article)
+with open ('new.txt', 'w', encoding='utf-8') as f:
+ f.write(article2)
+def count_tf(word, text):
+ return text.count(word) / len(text)
+def count_df(word, texts):
+ n = [1 for text in texts if word in text]
+ return sum(n)
+
+def count_idf(word, texts):
+ n = len(texts) / (1 + count_df(word, texts))
+ return n
+
+from math import log
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ return log(tf, 10) * log(idf, 10)
+import re
+punct = '[.,!«»?&@"$\[\]\(\):;%
+def preprocessing(text):
+ text_wo_punct = re.sub(punct, '', text.lower())
+ words = text_wo_punct.strip().split()
+ return words
+import os
+texts_dic = {}
+for root, dirs, files in os.walk('wikipedia'):
+ for f in files[:50]:
+ with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
+ text = preprocessing(t.read())
+ texts_dic[f.split('.')[0]] = text
+texts = list(texts_dic.values())
+for text in texts_dic:
+ for word in texts_dic[text]:
+ scores = {}
+ scores[word] = count_tf(word, texts_dic[text])
+ if scores[word] >= 55:
+ texts_dic[text].pop(word)
+for text in texts_dic:
+ print("Top words in document {}".format(text))
+ scores = {}
+ for word in texts_dic[text]:
+ scores[word] = count_tfidf(word, texts_dic[text], texts)
+ sorted_words = sorted(scores.items(), key=lambda x: x[1])
+ for word, score in sorted_words[:5]:
+ print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
+import os
+import re
+import codecs
+f2 = open('table.csv', 'w', encoding = 'utf-8')
+f2.write('Название текста' + ',' + 'Автор' + ',' 'Дата создания текста')
+for file in os.listdir('.'):
+ if file.endswith('xhtml'):
+ with codecs.open(file, 'r', 'Windows-1251') as f:
+ a = f.read()
+ f.close()
+ r1 = re.search('(.*)', a)
+ r2 = re.search('', a)
+ r3 = re.search('', a)
+ if r1 and r2 and r3:
+ f2.write(r1.group(1) + ',' + r2.group(1) + ',' + r3.group(1))
+f2.close()
+
+import os
+import re
+import codecs
+f1 = open('file_words.txt', 'w', encoding = 'utf-8')
+for file in os.listdir('.'):
+ if file.endswith('xhtml'):
+ f = codecs.open(file, 'r', 'Windows-1251')
+ a = f.read()
+ f.close()
+ r = re.search('(.*)', a)
+ if r:
+ f1.write(r.group(1) + '\t' + str(len(re.findall('', a))) + '\n')
+ print('1')
+f1.close()
+import os
+def func1():
+ freqdict = {}
+ for root, dirs, files in os.walk('.'):
+ for d in dirs:
+ if d[0] in freqdict:
+ freqdict[d[0]] += 1
+ else:
+ freqdict[d[0]] = 1
+ return freqdict
+def func2(freqdict):
+ x = 0
+ for i in freqdict:
+ if freqdict[i] > x:
+ x = freqdict[i]
+ a = i
+ print ('название большинства папок начинается на ' + a)
+ return
+func2(func1())
+import random
+def length(string):
+ s1 = ''
+ i = 0
+ for i in range(len(string)):
+ s1 += '.'
+ return s1
+def create_arr_and_dic():
+ f = open("Слова и подсказки.csv", encoding = "utf-8")
+ a = f.readlines()
+ arr = []
+ dic = {}
+ for line in a:
+ words = line.split(';')
+ x = words[0].strip('\ufeff')
+ arr.append(x)
+ dic[x] = words[1].strip('\n')
+ return arr, dic
+array, dictionary = create_arr_and_dic()
+y = random.choice(array)
+print('Вот ваша подсказка:', y, length(y))
+z = input('Загаданное слово: ')
+if z == dictionary[y]:
+ print('Правильно.')
+else:
+ print('Увы, нет:(')
+import re
+def func1():
+ d = {}
+ f = open('тестовый файл.txt', 'r', encoding = 'utf-8')
+ a = f.readlines()
+ for line in a:
+ line = re.sub('(\.\.?\.?|\?|!)(\n)? ?', '.', line)
+ sentences = line.split('.')
+ for sentence in sentences:
+ if len(sentence) >= 1:
+ d[sentence] = {word.strip(): len(word.strip())\
+ for word in sentence.split(' ')}
+ return d
+print(func1())
+word=input('Введите слово ')
+anotherword=''
+sameword=word
+print(word)
+for i in range(len(word)-1):
+ anotherword=word[len(word)-i-1]
+ for k in range(len(sameword)-1):
+ anotherword+=sameword[k]
+ print(anotherword)
+ sameword=anotherword
+f=open('text.txt', encoding="utf-8")
+a=f.readlines()
+z=0
+x=0
+m=0
+
+n=0
+for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ if words[z].endswith('.') or words[z].endswith(','):
+ x+=1
+ m+=x
+ x=0
+ n+=len(words)
+print ((n-m)/n*100, '% слов в этом тексте не оканчиваются точкой или запятой')
+import random
+def actor3():
+ slova=[]
+ f=open('actor3.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ slovo=random.choice(slova)
+ slovo=slovo.capitalize()
+ return slovo
+def adj2():
+ slova=[]
+ f=open('adj2.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ return random.choice(slova)
+def line1(noun, adjective):
+ return noun + ' ' + adjective
+def adverb2():
+ slova=[]
+ f=open('adverb2.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ slovo=random.choice(slova)
+ slovo=slovo.capitalize()
+ return slovo
+def verb2():
+ slova=[]
+ f=open('verb2.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ return random.choice(slova)
+def place2():
+ slova=[]
+ f=open('place2.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ return random.choice(slova)
+def line2(adverb, verb, place):
+ return adverb + ' ' + verb + ' di ' + place + '.'
+def actor2():
+ slova=[]
+ f=open('actor2.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ slovo=random.choice(slova)
+ slovo=slovo.capitalize()
+ return slovo
+def verb3():
+ slova=[]
+ f=open('verb3.txt', encoding="utf-8")
+ a=f.readlines()
+ z=0
+ for line in a:
+ words=line.split()
+ for z in range(len(words)):
+ slova.append(words[z])
+ z=0
+ return random.choice(slova)
+def line3(noun, verb):
+ return noun + ' ' + verb + '.'
+def randomhaiku():
+ haiku = line1(actor3(), adj2()) +\
+ '\n' + line2(adverb2(), verb2(), place2()) +\
+ '\n' + line3(actor2(), verb3())
+ return haiku
+print (randomhaiku())
+arr=[]
+s=input('Ввведите латинское слово ')
+if len(s)!=0:
+ arr.append(s)
+while len(s)!=0:
+ s=input('Ввведите латинское слово ')
+ if s.endswith ('re') or s.endswith ('i')or s.endswith ('isse') \
+ or s.endswith ('us esse') or s.endswith ('a esse') or s.endswith ('um esse') \
+ or s.endswith ('um iri'):
+ arr.append(s)
+for i in range (len(arr)):
+ print (arr[i])
+N=int(input('Введите число '))
+x=1
+while x!=N:
+ word=input('Введите слово ')
+ x+=1
+ if word=='программирование':
+ break
+print('Работа программы завершена')
+import re
+def func1():
+ arr = []
+ i = 0
+ f = open("Текст с глаголом выпить.txt", encoding = "utf-8")
+ a = f.readlines()
+ for line in a:
+ words = line.split()
+ for i in range(len(words)):
+ words[i] = words[i].lower()
+ arr.append(words[i].strip('.,!?/\|()";:'))
+ f.close()
+ return arr
+arr1 = []
+i = 0
+for i in range(len(func1())):
+ if re.search('вып((ей(те)?)|(ь(е((шь)|м|те?)|ют?))|(и((л(а|о|и)?)|(т(ь?|(ы(й|ми?|х|е))\
+|(ая?)|(о(е|(го)|й|му?)?)|(ую))))|в(ш((ая)|(ую)|и(й|ми?|х)|е(е|ю|му?)))?))', func1()[i]):
+ if func1()[i] not in arr1:
+ arr1.append(func1()[i])
+ print(func1()[i])
+a=input('Введите число а')
+b=input('Введите число b')
+c=input('Введите число c')
+a=int(a)
+b=int(b)
+c=int(c)
+if a+b==c:
+ print ('a и b в сумме дают c')
+else:
+ print ('a и b в сумме НЕ дают c')
+if a/b==c:
+ print ('a разделить на b равно c')
+else:
+ print ('a разделить на b НЕ равно c')
+def func1(text_file):
+ ed = 0
+ y = 0
+ i = 0
+ f = open(text_file, encoding = "utf-8")
+ a = f.readlines()
+ for line in a:
+ words = line.split()
+ for i in range(len(words)):
+ if words[i].endswith('ed'):
+ ed += 1
+ if words[i].endswith('ied'):
+ y += 1
+ arr = []
+ arr.append(ed)
+ arr.append(y)
+ return arr
+a = input('Введите название файла, который хотите открыть: ')
+print('Количество форм на -ed в тексте: ', func1(a)[0], \
+'\nИз них образованы от глаголов на -y: ', func1(a)[1])
+import re
+def func1():
+ f = open("Ферма, Пьер — Википедия.html", encoding = "utf-8")
+ a = f.readlines()
+ i = 0
+ for i in range(len(a)):
+ r1 = re.search("Научная сфера: | ", a[i])
+ r2 = re.search("", a[i+1])
+ if r1 and r2:
+ r = re.search("( \
+.* )", \
+ a[i+2])
+ break
+ f.close()
+ return r
+def func2():
+ if func1():
+ title = func1().group(2)
+ else:
+ print ('что-то пошло не так')
+ return title
+f = open("text_wiki.txt", 'w', encoding = "utf-8")
+f.write(func2())
+f.close()
+f = open("text_wiki.txt", encoding = "utf-8")
+a = f.readlines()
+for line in a:
+ print(line)
+import os
+alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ. '
+def func1():
+ number = 0
+ arr1 = []
+ for i in os.listdir('.'):
+ if os.path.isfile(i):
+ j = 0
+ check1 = True
+ check2 = 0
+ for j in range(len(i)):
+ if i[j] not in alphabet:
+ check1 = False
+ if i[j] == '.':
+ check2 += 1
+ if check1 == True and check2 <= 1:
+ number += 1
+ arr1.append(i)
+ print('Найдено файлов, название которых состоит \
+только из латинских символов: ' + str(number))
+ return arr1
+def func2(arr):
+ arr2 = []
+ for i in arr:
+ if i[0:i.find('.')] not in arr2:
+ arr2.append(i[0:i.find('.')])
+ for k in arr2:
+ print (k)
+ return
+func2(func1())
+import re
+def func1():
+ f1 = open("Философия -- Википедия.txt", 'r', encoding = "utf-8")
+ change1 = re.sub('Филос(о́|о)фи(я(х|ми?)?|и|е?й|ю)', 'Астрол\\1ги\\2', f1.read())
+ change2 = re.sub('философи(я(х|ми?)?|и|е?й|ю)', 'астрологи\\1', change1)
+ f1.close()
+ return change2
+def func2():
+ f2 = open("Астрология.txt", 'w', encoding = "utf-8")
+ f2.write(func1())
+ f2.close()
+ return True
+func2()
+import re
+def func1():
+ arr = []
+ i = 0
+ f = open("Космическая программа Китая.txt", encoding = "utf-8")
+ a = f.readlines()
+ for line in a:
+ arr.append(line)
+ f.close()
+ return arr
+i = 0
+arr1 = []
+for i in range(len(func1())):
+ res = re.findall('«[А-Яа-я ]*-[1-9]»', func1()[i])
+ j = 0
+ for j in range(len(res)):
+ if res[j] not in arr1:
+ arr1.append(res[j])
+ print (res[j])
+word=input('Введите русское существительное первого склонения')
+if word.endswith('а') or word.endswith ('я'):
+ print ('Именительный падеж, единственное число')
+elif word.endswith ('ами') or word.endswith ('ями'):
+ print ('Творительный падеж, множественное число')
+elif word.endswith('ы') or word.endswith ('и'):
+ print ('Родительный падеж, единственное число или именительный или винительный падеж, множественное число')
+elif word.endswith('е'):
+ print ('Дательный или предложный падеж, единственное число')
+elif word.endswith('ой') or word.endswith ('ою') or word.endswith('ёй') or word.endswith ('ёю')or word.endswith ('ею') or word.endswith ('ей'):
+ print ('Творительный падеж, единственное число')
+elif word.endswith('у') or word.endswith('ю'):
+ print ('Винительный падеж, единственное число')
+elif word.endswith ('ам') or word.endswith ('ям'):
+ print ('Дательный падеж, множественное число')
+elif word.endswith ('ах') or word.endswith('ях'):
+ print ('Предложный падеж, множественное число')
+else:
+ print ('Родительный или винительный падеж, множественное число')
+import re
+def func3b(string):
+ r = re.match('([А-Яа-я][a-я]*)= 10:
+ print (word, freqdict(func1())[word])
+a=9
+a=int(a)
+s=input('Введите число')
+s=int(s)
+if a==s:
+ print('Позравляю, вы угадали')
+else:
+ if a>s:
+ print('Загаданное число больше')
+ if a.+', line)
+ w_sum += len(arr1)
+ arr2 = re.findall('ana', line)
+ ana_sum += len(arr2)
+ print (str(ana_sum/w_sum))
+ return
+func1()
+def func1():
+ arr = []
+ i = 0
+ f = open("1.txt", encoding = "utf-8")
+ a = f.readlines()
+ for line in a:
+ words = line.split()
+ for i in range(len(words)):
+ words[i] = words[i].lower()
+ words[i] = words[i].strip('.,!?/\|()";:')
+ arr.append(words[i])
+ f.close()
+ return arr
+def func2(x,arr):
+ glasnye = 'аяоёуюэеыи'
+ slova = []
+ i = 0
+ for i in range(len(arr)):
+ j = 0
+ slogi = 0
+ for j in range(len(arr[i])):
+ if arr[i][j] in glasnye:
+ slogi += 1
+ if slogi == x:
+ slova.append(arr[i])
+ return slova
+def func3(bukva,arr):
+ slova = []
+ for word in arr:
+ if word[0] == bukva:
+ slova.append(word)
+ return slova
+y = input('Введите букву русского алфавита ')
+print(func3(y,func1()))
+def func1():
+ arr = []
+ i = 0
+ f = open("1.txt", 'r', encoding = "utf-8")
+ a = f.readlines()
+ for line in a:
+ words = line.split()
+ for i in range(len(words)):
+ words[i] = words[i].lower()
+ arr.append(words[i].strip(',.()«»!'))
+ f.close()
+ arr.sort()
+ return arr
+def freqdict(arr):
+ word_count = {}
+ for word in arr:
+ if word not in word_count:
+ word_count[word] = 1
+ else:
+ word_count[word] += 1
+ return word_count
+f1 = open("2.tsv", 'w', encoding = "utf-8")
+for j in sorted(freqdict((func1()))):
+ f1.write(j)
+ f1.write('\t')
+ f1.write(str(freqdict(func1())[j]))
+ f1.write('\n')
+f1.close()
+alphabet = 'abcdefghijklmnopqrstuvwxyzабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+alphabet = list(alphabet)
+def freqdict1(arr):
+ letter_count = {}
+ for letter in alphabet:
+ letter_count[letter] = 0
+ for word in arr:
+ if word.startswith(letter):
+ letter_count[letter] += 1
+ return letter_count
+f2 = open("3.tsv", 'w', encoding = "utf-8")
+for k in sorted(freqdict1(func1())):
+ f2.write(k)
+ f2.write('\t')
+ f2.write(str(freqdict1(func1())[k]))
+ f2.write('\n')
+f2.close()
+a=9
+a=int(a)
+s=input('Введите число ')
+if len(s)==0:
+ print ('Game over')
+s=int(s)
+while a!=s:
+ if a>s:
+ print('Загаданное число больше')
+ if a.+", line)
+ if s1:
+ if s1.group(1) not in freqdict:
+ freqdict[s1.group(1)] = 1
+ else:
+ freqdict[s1.group(1)] += 1
+ return freqdict
+def func2():
+ f3 = open("Ключи.txt", 'w', encoding = "utf-8")
+ for i in func1():
+ f3.write(i)
+ f3.write('\n')
+ f3.close()
+ return True
+func2()
+def func3():
+ f4 = open("Прилагательные.txt", 'w', encoding = "utf-8")
+ for i in func1():
+ s2 = re.search("l.f...", i)
+ if s2:
+ f4.write(i)
+ f4.write(' ')
+ f4.write(str(func1()[i]))
+ f4.write('\n')
+ f4.close()
+ return True
+func3()
+def func4():
+ f5 = open("Внутри тега body.txt", 'r', encoding = "utf-8")
+ change1 = re.sub("(.+)", "\\1 \\2 \\3", f5.read())
+ change2 = re.sub("<.*>", ' ', change1)
+ f5.close()
+ return change2
+import os
+import re
+def preprocessing():
+ all_meta = []
+ w = open('results.txt', 'w', encoding = 'utf-8')
+ for el in os.listdir('news'):
+ with open(os.path.join('news',el), 'r', encoding = 'Windows-1251') as f:
+ article = f.read()
+
+ sentences = re.findall(r'', article)
+ template = '{} {}\n'
+ w.write(template.format(el, len(sentences)))
+
+ author = re.findall(r'', article)
+ authorstr = author[0]
+ authorstr = re.sub('', '', authorstr)
+ topic = re.findall(r'', article)
+ topicstr = topic[0]
+ topicstr = re.sub('', '', topicstr)
+ meta = el+','+authorstr+','+topicstr+'\n'
+ all_meta.append(meta)
+
+ words = []
+ wordsraw = re.findall('.+', article)
+ for el in wordsraw:
+ wordsrawstr = el
+ wordsrawstr = re.sub('', '', wordsrawstr)
+ wordsrawstr = re.sub('', '', wordsrawstr)
+ wordsrawstr = re.sub('`', '', wordsrawstr)
+ wordsrawstr = wordsrawstr.lower()
+ words.append(wordsrawstr)
+ bigrams = []
+ for ind in range(1, len(words) - 1):
+ bigrams.append(' '.join([words[ind - 1], words[ind]]))
+ w.close()
+ return bigrams, all_meta
+def data(all_meta):
+ w = open('metadata.csv', 'w', encoding = 'utf-8')
+ w.write('Название файла,Автор,Тематика текста\n')
+ for el in all_meta:
+ w.write(el)
+ w.close()
+def bigram_processing(bigrams):
+ w = open('bigrams_res.txt', 'w', encoding = 'utf-8')
+ for el in bigrams:
+ if re.match(r'(в|на|о|об|обо|при|по) .+(е|и|ах|ях)', el) != None:
+ bigram = el + '\n'
+ w.write(bigram)
+ w.close()
+bigrams, all_meta = preprocessing()
+bigram_processing(bigrams)
+data(all_meta)
+w=input('Введите слово: ')
+while w == '':
+ w=input('Попробуйте еще раз: ')
+border = 1
+for i in range (len(w) // 2):
+ print (w[border:len(w) - border])
+ border += 1
+import re
+def print_forms():
+ with open('rudin.txt', 'r', encoding='utf-8') as f:
+ text = f.read()
+ wordarr = text.split()
+ sit_arr = []
+ for word in wordarr:
+ word.lower()
+ word.strip(',...!?-–— :,')
+ t = re.match('си(жу|д(е(ть|в((ши)?й?)?|л(а|о|и)?)|и(те?|м|шь)?|я(т|щий)?))', word)
+ if (t != None) and (word not in sit_arr):
+ sit_arr.append(word)
+ for el in sit_arr:
+ print (el)
+print_forms()
+import random
+def create_dict():
+ with open('db.txt', 'r', encoding = 'utf-8') as f:
+ db = f.read()
+ phrases = db.split('\n')
+ clues = dict()
+ keys = []
+ phrase_split = []
+ for phrase in phrases:
+ phrase_split = phrase.split()
+ clues[phrase_split[len(phrase_split) - 1]] = phrase_split[0:len(phrase_split) - 1]
+ keys.append(phrase_split[len(phrase_split) - 1])
+ return clues, keys
+def show(clues, keys, shown):
+ key = random.choice(keys)
+ while key in shown:
+ key = random.choice(keys)
+ clue_arr = clues[key]
+ for el in clue_arr:
+ print(el, end = ' ')
+ guess = input()
+ if guess.lower() == key:
+ check = True
+ else:
+ check = False
+ return check, key
+def result(check):
+ congrats = ['Поздравляю!', 'horoshego dnya!', 'Угадали!', 'Верно!', 'Хорошо сработано!']
+ condolences = ['Попробуйте еще раз!', 'Не отчаивайтесь, продолжайте!', 'Почти в точку... у вас есть еще попытка!', 'nichego, zavtra otgadaete!', 'escho chut-chut...']
+ if check == True:
+ print(random.choice(congrats))
+ else:
+ print(random.choice(condolences))
+def run():
+ shown = []
+ for i in range (10):
+ clues, keys = create_dict()
+ check, key = show(clues, keys, shown)
+ result(check)
+ if check == True:
+ shown.append(key)
+run()
+print('Всего доброго!')
+import re
+def change():
+ with open('aves.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = re.sub(r'\bптице.\b', r'рыбо.', text)
+ text = re.sub(r'\bПтице.\b', r'Рыбо.', text)
+ text = re.sub(r'\bптиц', r'\bрыб', text)
+ text = re.sub(r'\bПтиц', r'\bРыб', text)
+ with open('fish.txt', 'w', encoding = 'utf-8') as f:
+ f.write(text)
+ print('Текст записан в файл fish.txt')
+change()
+import re
+def search():
+ with open('chuvash.html', 'r', encoding = 'utf-8') as f:
+ source = f.read()
+ search_arr = source.split('', el)
+ codearr = re.split('', el[1])
+ result = codearr[0]
+ return result
+def record(result):
+ with open('blank.txt', 'w', encoding = 'utf-8') as f:
+ f.write(result)
+ f.close()
+ print('Трехбуквенный код языка записан в файл blank.txt')
+result = search()
+record(result)
+print('Введите число')
+n=float(input())
+while n<2:
+ print ('Отсутствует степени 2, не превышающие', n, 'Пожалуйста, введите число не меньше двух.')
+ n=float(input())
+d=2
+print ('Степени числа 2, не превышающие', n, ':', end=' ')
+while d<=n:
+ print(d, end=' ')
+ d*=2
+def opentext (title):
+ with open(title, 'r', encoding='utf-8') as f:
+ text = f.read()
+ arr = []
+ arr = text.split()
+ for elem in arr:
+ elem.lower()
+ elem.strip('!-./?"", ')
+ return arr
+def firstletter(letter, arr):
+ wordsarr = []
+ for elem in arr:
+ if letter == elem[0:2]:
+ wordsarr.append(elem)
+ return wordsarr
+def questions():
+ file_name = input('Введите путь к файлу: ')
+ minlen = int(input('Введите минимальную длину слова: '))
+ arr = opentext(file_name)
+ un_words = firstletter('un', arr)
+ return minlen, un_words
+def count(minlen, un_words):
+ wordslen = []
+ n = 0
+ for elem in un_words:
+ for letter in elem:
+ n += 1
+ if n > minlen:
+ wordslen.append(elem)
+ n = 0
+ print('Количество слов, начинающихся с un:', len(un_words))
+ print('Процент слов длинее', minlen, ':', len(wordslen)/len(un_words)*100)
+minlen, un_words = questions()
+count(minlen, un_words)
+import os
+import shutil
+import re
+def countfolders():
+ obj = os.listdir()
+ folders = [el for el in obj if os.path.isdir(el)]
+ result = []
+ for folder in folders:
+ if r'[a-z]|[A-Z]' and r'[а-яё]|[А-ЯЁ]' in folder:
+ result.append(folder)
+ print(folder)
+ print('Всего папок, удовлетворяющих условию:', len(result))
+countfolders()
+import os
+def mostfiles():
+ number = {root : len(files) for root, dirs, files in os.walk('.')}
+ c = 0
+ folder = ''
+ for root in number:
+ if number[root] > c:
+ c = number[root]
+ folder = root
+ print('Количество файлов в папке по адресу', folder, ':', c)
+mostfiles()
+text = open('exomars.txt','r',encoding='utf-8')
+arr = []
+countline = 0
+countall = 0
+symb = 0
+for line in text:
+ countall += 1
+ arr = line.split( )
+ for el in arr:
+ if el == '—':
+ symb += 1
+ countwords = len(arr) - symb
+ if countwords > 5:
+ countline += 1
+text.close()
+print('Всего строк:', countall,'Строк с числом слов больше 5:', countline, 'Процент:', round(countline*100/countall), '%')
+arr = []
+arr1 = []
+i = 0
+print('Пожалуйста, введите 8 слов')
+while i != 8:
+ word = input()
+ arr.append(word)
+ i += 1
+i = 0
+while i <= 6:
+ pair = arr[i] + arr[i+1]
+ arr1.append(pair)
+ i += 2
+for el in arr1:
+ print (el)
+import random
+def adj():
+ adj_arr = []
+ contadj = open('esenin_adj_pl.txt', 'r', encoding='utf-8')
+ for line in contadj:
+ line_lc = line.capitalize().strip()
+ adj_arr.append(line_lc)
+ contadj.close()
+ return random.choice(adj_arr)
+def noun():
+ noun = []
+ contnoun = open('spi_noun_pl.txt', 'r', encoding='utf-8')
+ for line in contnoun:
+ line_lc = line.lower().strip()
+ noun.append(line_lc)
+ contnoun.close()
+ return random.choice(noun)
+def verb():
+ verbs = []
+ contverbs = open('majakovsky_verbs.txt', 'r', encoding='utf-8')
+ for line in contverbs:
+ line_lc = line.capitalize().strip()
+ verbs.append(line_lc)
+ contverbs.close()
+ return random.choice(verbs)
+def adv():
+ adv = []
+ contadverb = open('pushkin_adverbs.txt', 'r', encoding='utf-8')
+ for line in contadverb:
+ line_lc = line.lower().strip()
+ adv.append(line_lc)
+ contadverb.close()
+ return random.choice(adv)
+def prop():
+ prop = []
+ contprop = open('properties.txt', 'r', encoding='utf-8')
+ for line in contprop:
+ line_lc = line.lower().strip()
+ prop.append(line_lc)
+ contprop.close()
+ return random.choice(prop)
+def line1():
+ syll = 0
+ while syll != 5:
+ syll = 0
+ first = adj() + ' ' + noun()
+ for letter in first:
+ if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
+ syll += 1
+ return first
+def line2():
+ syll = 0
+ while syll != 7:
+ syll = 0
+ second = verb() + ' ' + adv() + ' ' + adv() + random.choice(['!','?','.','...'])
+ for letter in second:
+ if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
+ syll += 1
+ return second
+def line4():
+ syll = 0
+ while syll != 7:
+ syll = 0
+ fourth = verb() + ' ' + adv() + random.choice(['!','?','.','...'])
+ for letter in fourth:
+ if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
+ syll += 1
+ return fourth
+def line5():
+ syll = 0
+ person=['Я','Ты']
+ while syll != 7:
+ syll = 0
+ fifth = random.choice(person) + ' ' + prop() + ' ' + adv() + random.choice(['!','?','.','...'])
+ for letter in fifth:
+ if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
+ syll += 1
+ return fifth
+print(line1())
+print(line2())
+print(line1())
+print(line4())
+print(line5())
+print('Введите три числа')
+a,b,c=float(input()), float(input()), float(input())
+div=a/b
+deg=a**b
+if div==c:
+ print ('Результат деления А на B равен С')
+else:
+ print ('Результат деления А на B НЕ равен С')
+if deg==c:
+ print ('А в степени B равно С')
+else:
+ print ('А в степени B НЕ равно С')
+import re
+def preproc():
+ with open('text.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ allsent = re.split(r'[\.\?\!]', text)
+ allsent = [sent.lower() for sent in allsent]
+ allsent = [re.sub(r'[,—“\':”\(\)]', '', sent) for sent in allsent]
+ return allsent
+def count(sent):
+ num = {word : sent.count(word) for word in sent}
+ several = {word : num[word] for word in num if num[word]>1}
+ if several == {}:
+ several = {'Повторяющихся слов' : '0'}
+ return several
+def display(several):
+ print('Следующее предложение: ')
+ template = '{:^10} {:^10}'
+ for keyword in several:
+ print(template.format(keyword, several[keyword]))
+allsent = preproc()
+for sentence in allsent:
+ arr = re.split(r' ', sentence)
+ several = count(arr)
+ display(several)
+import re
+def opencount():
+ with open('corp.xml', 'r', encoding = 'utf-8') as f:
+ text = f.readlines()
+ c = 0
+ for line in text:
+ line.strip('\s')
+ if '' not in line:
+ c += 1
+ else:
+ break
+ numheader = str(c) + '\n'
+ return numheader, text
+def create_dict(text):
+ newdict = {}
+ typearr = []
+ allmorphs = []
+ for line in text:
+ if '')
+ allmorphs.append(morph1[0])
+ keys = []
+ for el in allmorphs:
+ if el not in keys:
+ keys.append(el)
+ for key in keys:
+ num = allmorphs.count(key)
+ newdict[key] = num
+ return newdict
+def writenum(c, newdict, neutrum, csvarr):
+ with open('result.txt', 'w', encoding = 'utf-8') as f:
+ f.write(c)
+ for key, freq in newdict.items():
+ string = str(key) + ':' + str(freq) + '\n'
+ f.write(string)
+ line = ''
+ for el in neutrum:
+ line += el + ', '
+ line += '\n'
+ f.write(line)
+ for el in csvarr:
+ f.write(el)
+ print('Записано.')
+def search_pro_n(text):
+ neutrum = []
+ for line in text:
+ q = re.search('type="f.h', line)
+ if q != None:
+ form1 = line.split('">')
+ form2 = form1[1].split('')
+ neutrum.append(form2[0])
+ return neutrum
+def wholecorpora():
+ csvarr = []
+ with open('corp.xml', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ arr = text.split('')
+ arr1 = arr[1].split('')
+ arrlines = arr1[0].split('\n')
+ for line in arrlines:
+ if '', ', ', line)
+ line = re.sub('', '\n', line)
+ csvarr.append(line)
+ return csvarr
+c, text = opencount()
+newdict = create_dict(text)
+neutrum = search_pro_n(text)
+csvarr = wholecorpora()
+writenum(c, newdict, neutrum, csvarr)
+
+file = open ("цитаты1.txt", "r", encoding = "utf-8")
+stroki = 0
+for line in file:
+ arr = line.split('—')
+ ar = arr[0].split()
+ if len(arr) > 0:
+ if len(ar) < 10:
+ print (arr[0])
+
+
+
+a = float(input ('Введите a:'))
+b = float(input ('Введите b:'))
+c = float(input ('Введите c:'))
+U1 = U4 = False
+if a * b == c:
+ U1 = True
+ print ('Выполняется условие 1')
+if a * c + b == 0:
+ U4 = True
+ print ('Выполняется условие 4')
+if U1 and U4:
+ print ('Выполняются условия 1 и 4')
+else:
+ if U1 == False and U4 == False:
+ {
+ print ('НЕ выполняется ни одно из условий 1 или 4')
+ }
+print ('Для завешения нажмите ENTER')
+ENTER = input('')
+import re
+def get_text(fn):
+ a = []
+ with open(fn, 'r', encoding = "utf-8") as f:
+ for line in f:
+ a.append(line)
+ return a
+def main():
+ text = get_text('Санкт-Петербург — Википедия.html')
+ reg = ']*?>(UTC[+-]?\d{1,2}:?\d{0,2})'
+ for ti in text:
+ m = re.search(reg, ti)
+ if m != None:
+ print(m.group(1))
+ return m.group(1)
+def record():
+ r = main()
+ f = open("result.txt","w")
+ f.write("Часовой пояс - " + r)
+ f.close()
+record()
+def open_text():
+
+ with open('Austen Jane. Pride and Prejudice.txt', "r", encoding = "utf-8") as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip('.,!?-;:“"”''')
+ return arr
+
+def isness(word):
+
+ Ret = 0
+ if len(word) > 4:
+ if word[-4:] == 'ness':
+ Ret = 1
+ else:
+ Ret = 0
+ return Ret
+def AddInList(word, List, Qn):
+
+ Yes = 0
+ for i in range(len(List)):
+ if (List[i] == word):
+ Qn[i] +=1
+ Yes = 1
+ if (Yes == 0):
+ List.append(word)
+ Qn.append(1)
+Inarr = open_text()
+List = list()
+Qn = list()
+for i in range (len(Inarr)):
+ if isness(Inarr[i]) == 1:
+ AddInList(Inarr[i], List, Qn)
+print('Количество разных сущ. с суффиксом -ness равно: ' + str(len(List)))
+Max = 0
+Ind = 0
+for i in range(len(List)):
+ if Qn[i] > Max:
+ Ind = i
+ Max = Qn[i]
+print('Максимальную частотность имеет слово: ' + List[Ind] + ', с частотностью: ' + str(Qn[Ind]))
+import os
+import re
+def papka():
+ folder = [f for f in os.listdir('.')if not re.search(r'[0-9]+',f)if os.path.isfile(f)]
+ print(len(folder))
+ return folder
+papka()
+def dop():
+ arr = []
+ astr = 0
+ exist = 0
+ folder = [f for f in os.listdir('.')]
+ for p in range(len(folder)):
+ for j in range(len(folder[p])):
+ if folder[p][j] == '.':
+ astr = folder[p][0:j]
+ exist = 0
+ for k in range(len(arr)):
+ if arr[k] == astr:
+ exist = 1
+ if exist == 0:
+ arr.append(astr)
+ return arr
+print(dop())
+word = input ("Введите слово на кириллице:")
+i = 0
+while i < len(word):
+ if word[i] == 'п' or word[i] == 'о' or word[i] == 'е':
+ print (word[i])
+ i = i+2
+print ("Для завершения работы нажмите ENTER")
+ENTER = input ('')
+import re
+def open_text():
+ with open('Викинги — Википедия.html', "r", encoding = "utf-8") as f:
+ text = f.read()
+ return text
+def replacement():
+ result1 = re.sub('викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'бурундук\\1', open_text())
+ result2 = re.sub('Викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'Бурундук\\1', result1)
+ return result2
+def record():
+ r = replacement()
+ f = open("result.txt","w", encoding = "utf-8")
+ f.write(r)
+ f.close()
+ return f
+record()
+
+import re
+import os
+import csv
+def first():
+ reg = ' '
+ for i in os.listdir('.'):
+ if i.endswith('.xhtml'):
+ m = []
+ with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t:
+ text = t.read()
+ for t in re.findall(reg, text):
+ m.append(t)
+ with open('new_text.txt', 'a', encoding = 'utf-8') as f:
+ f.write(i+'\t'+str(len(m)) + '\n')
+first()
+
+def second():
+ for i in os.listdir('.'):
+ reg = ''
+ with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t:
+ text = t.read()
+ for t in re.findall(reg, text):
+ if re.search('', text):
+ with open('table.csv', 'a', encoding = 'utf-8') as f:
+ f.write(i+','+re.search('', text))
+second()
+import random
+
+def read_words(filename):
+
+ file = open(filename, "r", encoding = "utf-8")
+ arr = []
+ for line in file:
+ arr += line.strip().split(', ')
+ file.close()
+ return arr
+def verb(number):
+
+
+ if number == 's':
+ return random.choice(read_words("singular_verbs.txt"))
+ else:
+ return random.choice(read_words("plural_verbs.txt"))
+def noun(number):
+
+
+ if number == 's':
+ return random.choice(read_words("singular_nouns.txt"))
+ else:
+ return random.choice(read_words("plural_nouns.txt"))
+def clinoun():
+
+
+ return random.choice(read_words("clitic_noun.txt"))
+def adverb():
+
+
+ return random.choice(read_words("adverb.txt"))
+def punctuation():
+
+ return random.choice(read_words("punctuation.txt"))
+def verse1():
+
+ return clinoun() + ' ' + noun('s') + ' ' + adverb() + ' ' + verb('s') + punctuation()
+def verse2():
+
+ return noun('pl') + ' ' + verb('pl') + ' ' + adverb() + ' ' + clinoun() + punctuation()
+def verse3():
+
+ return noun('s') + ' ' + adverb() + ' ' + clinoun() + ' ' + verb('s') + punctuation()
+def make_verse():
+
+ verse = random.choice([1,2,3])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ else:
+ return verse3()
+for n in range(4):
+ print(make_verse())
+def open_text():
+ with open('green.txt', "r", encoding = "utf-8") as f:
+ text = f.read()
+ arr = text.split('.')
+ return arr
+def deli():
+ txt = open_text()
+ for i, w in enumerate(txt):
+ for s in '.,!?-;:“"”''()«»–':
+ txt[i] = txt[i].replace(s, "")
+ return txt
+def des():
+ txt = deli()
+ dlina = [x for x in txt if len(x.split()) > 10]
+ return dlina
+def big():
+ txt = des()
+ f = []
+ for i in txt:
+ f += [x for x in i.split() if x[0].isupper()]
+ return f
+print (big())
+file = open("text.txt", "r", encoding = "utf-8")
+lmin = lmax = len(file.readline())
+for line in file:
+ lp = len(line)
+ if lp > 0:
+ if lmin > lp:
+ lmin = lp
+ if lmax < lp:
+ lmax = lp
+print (lmax / lmin)
+
+def open_text_1():
+
+ with open('islandcorp.xml', "r", encoding = "utf-8") as f:
+ Line = 0
+ for i in f:
+ if i != '\n':
+ Line += 1
+ else:
+ break
+ return Line
+def record():
+ with open("result1.txt","w", encoding = "utf-8") as f:
+ f.write(str(open_text_1()))
+ return
+import re
+
+def keys():
+
+ with open('islandcorp.xml', "r", encoding = "utf-8") as f:
+ text = f.read()
+ Dic = {}
+ reg = '.*?'
+ res = re.findall(reg, text)
+
+ for i in range(len(res)):
+ if res[i] not in Dic:
+ Dic[res[i]] = 1
+ else:
+ Dic[res[i]] += 1
+
+ return Dic
+def record1():
+ with open("result2.txt","w", encoding = "utf-8") as f:
+ a = keys()
+ for key in a:
+ f.write(key + ',' + str(a[key])+ '\n')
+record()
+record1()
+import os
+import re
+def main():
+ Sum = 0
+ for root, dirs, files in os.walk('.'):
+ for d in dirs:
+ cir = 0
+ for i in range(len(d)):
+ a = re.search(r'[а-яёЁ А-Я]+',d[i])
+ if a == None:
+ cir = 1
+ if cir == 0:
+ Sum += 1
+ return Sum
+print(main())
+arr =[]
+word = input("Введите слово: ")
+while word:
+ arr.append(word)
+ word = input ("Введите слово: ")
+w = 0
+for w in range (len(arr)):
+ if len(arr[w]) > 5:
+ print (arr[w])
+print ("Для завершения работы нажмите ENTER")
+ENTER = input ('')
+import re
+def open_text():
+
+ with open('txtfind.txt', "r", encoding = "utf-8") as f:
+ text = f.read()
+ text = text.lower()
+ arr = text.split()
+ for i, w in enumerate(arr):
+ arr[i] = arr[i].strip('.,!?-;:“"”''')
+ return arr
+def find_in_text():
+
+ List = list()
+ regex = '\W?(на(((й((д(у(т(ся)?)?|ёшь(ся)?|ёт(ся|е(сь)?)?|ём(ся)?|и|ите(сь)?|я|енный|ены))|ти(сь)?)))|(ш(ёл(ся)?|л(а|и|о)(сь)?|едш(и|(ий|ая|ее)(ся)?)))))\W?'
+ words = open_text()
+ for i in range (len(words)):
+ m = re.search(regex, words[i])
+ if m != None:
+ List.append(words[i])
+ return List
+uList = list()
+List = find_in_text()
+for i in range(len(List)):
+ Include = 0
+ for j in range(len(uList)):
+ if uList[j] == List[i]:
+ Include = 1
+ if Include == 0:
+ print(List[i])
+ uList.append(List[i])
+word = ('abracadabra')
+i=0
+while i <= len(word):
+ print (word[0:i])
+ i = i+1
+import random
+def get_words(fn):
+
+ words = {}
+ with open(fn, 'r') as fd:
+ for line in fd:
+ word, collocations = line.split(',', 1)
+ words[word] = collocations.replace(word, '.'*len(word)).split(',')
+ return words
+def ask_riddle(words_dict):
+
+ words = list(words_dict.keys())
+ rnd_word = random.choice(words)
+ rnd_collocation = random.choice(list(words_dict[rnd_word]))
+ print(rnd_collocation)
+ word = input('Пропущенное слово:')
+ return rnd_word, word == rnd_word
+def main():
+
+ words = get_words('f3.csv')
+ word, answer = ask_riddle(words)
+ print('И это правильный ответ!' if answer else 'Вы ошиблись, правильный ответ: '+ word)
+ return word, answer
+main()
+import re
+def main():
+ s = ''
+ f = open("Викинги.html","r",encoding="utf-8")
+ for line in f:
+ line = re.sub("в(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","бурундук\\2",line)
+ line = re.sub("В(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","Бурундук\\2",line)
+ s = s + line
+ f.close()
+ return s
+def record():
+ s = main()
+ f = open("results.txt","w",encoding='utf-8')
+ f.write(s)
+ f.close()
+record()
+a=int(input('input a number1: '))
+b=int(input('input a number2: '))
+c=int(input('input a number3: '))
+print('\na=',a,'\nb=',b,'\nc=',c)
+if a*b==c:
+ print('\nПроизведение чисел a и b равно числу c')
+else:
+ print('\nПроизведение чисел a и b не равно c')
+if a*c+b==0:
+ print('Число c является решением линейного уравнения a*x+b=0')
+else:
+ print('Число c не является решением линейного уравнения a*x+b=0')
+print('\nЧтобы завершить программу, нажмите Enter')
+ENTER=input('')
+
+import random
+def phrase():
+ f0 = open("plus1.txt","r",encoding="utf-8")
+ pr1 = f0.read().split()
+ p1 = random.choice(pr1)
+ f1 = open("plus2.txt","r",encoding="utf-8")
+ pr2 = f1.read().split()
+ p2 = random.choice(pr2)
+ return p1 + ' ' + p2
+def adjective():
+ f2 = open("adj.txt","r",encoding="utf-8")
+ adj = f2.read().split()
+ return random.choice(adj)
+def verb():
+ f3 = open("verb.txt","r",encoding="utf-8")
+ v = f3.read().split()
+ return random.choice(v)
+def noun(num):
+ f4 = open("sg.txt","r",encoding="utf-8")
+ nounsg = f4.read().split()
+ f5 = open("pl.txt","r",encoding="utf-8")
+ nounpl = f5.read().split()
+ f6 = open("ind.txt","r",encoding="utf-8")
+ nounind = f6.read().split()
+ if num == 'pl':
+ return random.choice(nounpl)
+ if num == 'ind':
+ return random.choice(nounind)
+ return random.choice(nounsg)
+def punctuation():
+ puncts = [".", "?", "!", "...",";"]
+ return random.choice(puncts)
+def verse1():
+ return phrase() + ' ' + noun("sg") + ' ' + noun("pl") + punctuation()
+def verse2():
+ return verb() + ', ' + verb() + ' ' + noun("ind") + punctuation()
+def verse3():
+ return noun("sg") + ' ' + adjective() + ' ' + noun("pl") + punctuation()
+def doit():
+ verse = random.choice([1,2,3])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ else:
+ return verse3()
+for n in range(4):
+ print(doit())
+import os
+import re
+def texts(name):
+ f = open(name, 'r')
+ text = f.read()
+ x = re.findall('.+', text)
+ f.close()
+ return x
+def resutls(s,fname):
+ f = open(fname,"w",encoding = "utf-8")
+ f.write(s)
+ f.close()
+def words():
+ s = ""
+ for roots, dirs, files in os.walk('.'):
+ for file in files:
+ if file.endswith('.xhtml'):
+ s = s + file + "\t"+ str(len(texts(os.path.join(roots,file)))) + "\n"
+ results(s,"result1.txt")
+if __name__ == '__words__':
+ words()
+import os
+def main():
+ num = 0
+ for root, dirs, files in os.walk('.'):
+ for d in dirs:
+ k = 0
+ for i in d:
+ if i not in "йцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮ":
+ k += 1
+ if k == 0:
+ num += 1
+ return num
+if __name__ == '__main__':
+ print(main())
+import random
+def words():
+ f = open("1.csv","r",encoding="utf-8")
+ a = f.read().split(',')
+ m = []
+ for n in a:
+ b = n.rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
+ m.append(b)
+ return m
+def d():
+ m = words()
+ d = {}
+ for i in m:
+ a = i.split()
+ d[a[0]] = a[1]
+ return d
+def rand():
+ m = words()
+ di = d()
+ mas = []
+ for n in di.keys():
+ mas.append(n)
+ v = random.choice(mas)
+ return v
+def attempt():
+ di = d()
+ v = rand()
+ j = 0
+ for i in di[v]:
+ j += 1
+ print(v,'.'*j)
+ s = input()
+ if s == di[v]:
+ result = "you win"
+ else:
+ result = "you lose"
+ return result
+print(attempt())
+
+s=input("введите слово: ")
+i=0
+for letter in s:
+ if (i+1)%2!=0 :
+ if s[i]=='о' or s[i]=='п' or s[i]=='е':
+ print(s[i])
+ i=i+1
+print('\nЧтобы завершить программу, нажмите Enter')
+ENTER=input('')
+import re
+def text():
+ a=[]
+ f = open("Санкт-Петербург.html","r",encoding="utf-8")
+ for line in f:
+ a.append(line)
+ return a
+def main():
+ a=text()
+ s=''
+ p1 = int; p2 = int
+ regex = '"[A-Z][A-Z][A-Z](\+|-)?[0-9][0-9]?:?[0-90-9]?"'
+ for line in a:
+ b=line.split()
+ for i in b:
+ res = re.search(regex,i)
+ if res != None:
+ p1 = i.find('>')
+ p2 = i.find('<')
+ s=i[p1+1:p2]
+ return s
+def record():
+ s=main()
+ f = open("result.txt","w")
+ f.write("Часовой пояс - "+s)
+ f.close()
+record()
+def names():
+ import os
+ m = os.listdir('.')
+ return m
+def main():
+ m = names()
+ newm = []
+ num = 0
+ for i in m:
+ k = 0
+ for j in i:
+ if j in '1234567890':
+ k += 1
+ if k == 0:
+ num += 1
+ if '.' in i:
+ i = i[:i.index('.')]
+ if i not in newm:
+ newm.append(i)
+ print('num = {}'.format(num))
+ print(newm)
+if __name__ == '__main__':
+ main()
+
+
+
+def text():
+ f = open("ness.txt","r",encoding="utf-8")
+ a = f.read().split()
+ m = []
+ for n in a:
+ b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
+ m.append(b)
+ return m
+def ness(m):
+ mas = []
+ s = ""
+ for i in m:
+ if i[-4:] == 'ness':
+ if i not in s:
+ mas.append(i)
+ s = s + i + " "
+ return mas
+def numb():
+ m = text()
+ mas = ness(m)
+ return len(mas)
+def main():
+ m = text()
+ b = ness(m)
+ mas2 = []
+ fr = ""
+ s = ""
+ for i in m:
+ if i[-4:] == 'ness':
+ s = s + i + " "
+ for n in b:
+ mas2.append(s.count(n))
+ maxi = mas2[0]
+ for j in mas2:
+ if j > maxi:
+ maxi = j
+ for n in b:
+ if s.count(n) == maxi:
+ fr=fr+" "+n
+ return fr
+print("Количество разных слов на -ness =",numb(),"\nСамое(ые) частотное(ые) -",main())
+import re
+def lines():
+ f = open('vim4.txt','r',encoding='utf-8')
+ a = f.read()
+ c = re.split(r'[.?!]',a)
+ lines = [' '.join([word.strip('.,<>/?""-=_+''""[]{}()*&^%$
+ return lines
+def main():
+ sents = lines()
+ results = []
+ for line in sents:
+ k = ''
+ k = ['+' for w in line.split()]
+ if len(k) > 10:
+ for w in line.split():
+ if w.istitle() == True:
+ results.append(w)
+ return results
+if __name__ == '__main__':
+ print(main())
+import re
+def text():
+ f = open("portrait.txt","r",encoding="utf-8")
+ a = f.read().split()
+ m = []
+ for n in a:
+ b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
+ m.append(b)
+ return m
+def main():
+ m = text()
+ regex = 'на(й|ш(е|ё)?)(т|д|л)(ш|енн?)?(а?я?|(и|о|ы|(е|ё)|ую?)?(т|шь)?(ся)?(м(у|и)?|го|е|й|х)?)?'
+ s = ''
+ for i in m:
+ res = re.search(regex,i)
+ if res != None:
+ k = 0
+ for j in i:
+ if j not in regex:
+ k += 1
+ if k == 0:
+ if i not in s:
+ s = s + i + ' '
+ return s
+print(main())
+import re
+def opp():
+ k = 0
+ f = open("it.xml","r",encoding="utf-8")
+ for line in f:
+ k += 1
+ f.close()
+ return k
+def record1():
+ f = open('result1.txt','w',encoding='utf-8')
+ f.write(str(opp()))
+ f.close()
+record1()
+def dic():
+ d = {}
+ regex1 = 'lemma="'
+ regex2 = 'type="[a-zþ0-9]+"'
+ f = open("it.xml","r",encoding="utf-8")
+ for line in f:
+ if re.search(regex1,line) != None:
+ res = re.search(regex2,line)
+ if res != None:
+ p1 = line.rfind('"')
+ p2 = line.find('type=')
+ s = line[p2+6:p1]
+ if s in d.keys():
+ d[s] += 1
+ else:
+ d[s] = 1
+ return d
+def record2():
+ d = dic()
+ f = open('result1.txt','a',encoding='utf-8')
+ for i in d.keys():
+ f.write('\n'+i)
+ f.close()
+record2()
+def plur():
+ d = {}
+ regex1 = 'lemma="'
+ regex2 = 'type="[a-zþ0-9]+"'
+ f = open("it.xml","r",encoding="utf-8")
+ for line in f:
+ if re.search(regex1,line) != None:
+ res = re.search(regex2,line)
+ if res != None:
+ p1 = line.rfind('"')
+ p2 = line.find('type=')
+ s = line[p2+6:p1]
+ if s[0] == 'l' and s[2] == 'f':
+ if s in d.keys():
+ d[s] += 1
+ else:
+ d[s] = 1
+ return d
+def record3():
+ d = plur()
+ f = open('result2','w',encoding='utf-8')
+ for i in d.keys():
+ f.write(i+' - '+str(d[i])+'\n')
+ f.close()
+record3()
+f=open("new1.txt","r",encoding = "utf-8")
+mx=mn=len(f.readline())
+for line in f:
+ if line != "\n":
+ if len(line) > mx:
+ mx = len(line)
+ if len(line) < mn:
+ mn = len(line)
+print(mx/mn)
+f.close()
+
+f=open("text1.txt","r",encoding = "utf-8")
+for line in f:
+ sym=line.split(" ")
+ if sym[2]=="союз":
+ print(line)
+f.close()
+
+f=open("text1.txt","r",encoding = "utf-8")
+s = input("Введите слово: ")
+m = []
+while s!='':
+ m.append(s)
+ s=input("Введите слово: ")
+for i in m:
+ for line in f:
+ sym = line.split(" ")
+ if i == sym[0]:
+ print(i,sym[1:])
+ else:
+ print(i+" - в словаре нет такого слова")
+ break
+f.close()
+
+f=open("text1.txt","r",encoding = "utf-8")
+s=0
+for line in f:
+ sym=line.split(" ")
+ if sym[4]=="ед" and sym[5]=="жен":
+ print(sym[0]+",")
+ s=s+float(sym[-1])
+print(s)
+f.close()
+
+m=[]
+s=input('введите слово: ')
+while s!='':
+ m.append(s)
+ s=input('введите слово: ')
+for word in m:
+ if len(word)>5:
+ print(word)
+print('Чтобы завершить программу, нажмите ENTER')
+ENTER=input('')
+word=input('введите слово: ')
+newword=''
+for letter in word:
+ newword=newword+letter
+ print(newword)
+import re
+import os
+def countsent(file):
+ sent = 0
+ s = open (file,'r')
+ lines = s.readlines()
+ for line in lines:
+ if re.search('',line):
+ sent = sent + 1
+ return sent
+def file_countsent():
+ cw = open ('countsent.txt','w',encoding='utf-8')
+ for root, dirs, files in os.walk('news'):
+ for f in files:
+ cw.write(f+'\t'+str(countsent(os.path.join(root, f)))+'\n')
+def text_data(txt1):
+ topic = re.search(r'', txt1).group(1)
+ author = re.search(r'', txt1).group(1)
+ data = [author, topic]
+ return data
+def csv(data, name):
+ with open(name, 'a', encoding='cp1251') as f:
+ f.write(data[2]+'\t'+data[0]+'\t'+data[1]+'\n')
+def supertable():
+ data1 = []
+ for root, dirs, files in os.walk('news'):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding='cp1251') as m:
+ txt = m.read()
+ data = text_data(txt)
+ data.append(f)
+ data1.append(data)
+ for data in data1:
+ csv(data, 'supertable.csv')
+file_countsent()
+supertable()
+import re
+def openfile():
+ file1 = input('Введите путь к файлу: ')
+ with open(file1, "r", encoding="utf-8") as f:
+ arr = []
+ lines = f.readlines()
+ for line in lines:
+ if line.strip() == '':
+ break
+ else: arr.append(line)
+ print('Число строк заголовка', len(arr))
+def dictionary():
+ file2 = input('Введите путь к файлу: ')
+ with open(file2, "r", encoding="utf-8") as f:
+ dictn = {}
+ text = f.read()
+ findtype = re.findall(r'type="\w+">', text)
+ for i in findtype:
+ i = i[6::].strip('">')
+ if i not in dictn:
+ dictn[i] = 1
+ else:
+ dictn[i] += 1
+ file3 = input('Введите путь к файлу, куда будет записана информация из словаря: ')
+ with open(file3, "r", encoding="utf-8") as f:
+ for key in dictn:
+ f.write(str(key, dictn[key]))
+openfile()
+dictionary()
+with open('ugadaika.csv', 'r', encoding = 'utf-8') as f:
+ words = []
+ a = f.read()
+ words = a.split(',')
+ dic = {}
+ for i, word in enumerate(words):
+ if i%2 == 0:
+ dic[word] = words[i+1]
+ print('Я хочу сыграть с тобой в одну игру... Какое слово я загадал? Количество точек равно количеству букв в слове.')
+ for key in dic:
+ print(dic[key])
+ b = input()
+ if b == key:
+ print('Молодчинка!!!')
+ else:
+ print ('Ты не очень умный, я загадал не это.')
+mylist = []
+with open('proga.txt', 'r', encoding='utf-8') as f:
+ for line in f.readlines():
+ x = len(line)
+ mylist.append(x)
+mini = mylist[0]
+maxi = mylist[0]
+for i in mylist:
+ if i <= mini:
+ mini = i
+ if i > maxi:
+ maxi = i
+print(maxi/mini)
+
+
+import random
+def adj():
+ a=[]
+ with open ('adj.txt','r',encoding='utf-8') as f:
+ a=f.read()
+ return random.choice(a.split())
+
+def Petya():
+ b=[]
+ with open ('nouns_like_Petya.txt','r',encoding='utf-8') as f:
+ b=f.read()
+ return random.choice(b.split())
+
+def kustik():
+ k=[]
+ with open ('nouns_like_kustik.txt','r',encoding='utf-8') as f:
+ k=f.read()
+ return random.choice(k.split())
+def prep():
+ c=[]
+ with open ('prep.txt','r',encoding='utf-8') as f:
+ c=f.read()
+ return random.choice(c.split())
+def adjfem():
+ d=[]
+ with open ('adjfem.txt','r',encoding='utf-8') as f:
+ d=f.read()
+ return random.choice(d.split())
+def nounfem():
+ e=[]
+ with open ('nounfem.txt','r',encoding='utf-8') as f:
+ e=f.read()
+ return random.choice(e.split())
+def verb():
+ g=[]
+ with open ('verbpf.txt','r',encoding='utf-8') as f:
+ g=f.read()
+ return random.choice(g.split())
+def punct():
+ h=[]
+ with open ('punct.txt','r',encoding='utf-8') as f:
+ h=f.read()
+ return random.choice(h.split())
+def verse1():
+ return adj() + ' ' + Petya() + ' ' + verb() + ' ' + kustik() + punct()
+def verse2():
+ return prep() + ' ' + adjfem() + ' ' + nounfem() + punct()
+def verse3():
+ return adj() + ' ' + kustik() + ' ' + verb() + ' ' + Petya() + punct()
+def verse4():
+ return Petya() + ' ' + verb() + ' ' + nounfem() + punct()
+def make_verse():
+ verse = random.choice([1,2,3,4])
+ if verse == 1:
+ return verse1()
+ elif verse == 2:
+ return verse2()
+ elif verse == 3:
+ return verse3()
+ else:
+ return verse4()
+for n in range(4):
+ print(make_verse())
+import os
+import re
+nonum = []
+num = []
+for f in os.listdir('.'):
+ if re.search('[1234567890]', f):
+ num.append(f)
+ else:
+ nonum.append(f)
+print('Файлов, не содержащих цифр в названии: ', len(nonum))
+print('Введите число a и нажмите Enter')
+a=int(input())
+print('Введите число b и нажмите Enter')
+b=int(input())
+print('Введите число c и нажмите Enter')
+c=int(input())
+if a*b==c:
+ print(c ,'является произведением', a,' и ', b)
+else:
+ print(c ,' не является произведением', a,' и ', b)
+if c*a==(-1)*b:
+ print(c,'является решением линейного уравнения', a,'x +',b,'= 0')
+else:
+ print(c,'не является решением линейного уравнения', a,'x +',b,'= 0')
+b=1
+int (b)
+a=(input())
+for i in a:
+ if (b%2)&((i=='о')or(i=='п')or(i=='е')):
+ print (i)
+ b+=1
+import re
+def vikings():
+ wikifile = input('Время альтернативной истории! Введите имя файла со статьей про викингов: ')
+ with open(wikifile, 'r', encoding = 'utf-8') as f:
+ wikiarticle = f.read()
+ return wikiarticle
+def change1(wikiarticle):
+ myarticle1 = re.sub('викинг', 'бурундук', wikiarticle)
+ return myarticle1
+def change2(myarticle1):
+ myarticle2 = re.sub('Викинг', 'Бурундук', myarticle1)
+ return myarticle2
+def chimpunks(myarticle2):
+ newfile = input('Введите имя файла, куда следует поместить измененную статью: ')
+ with open(newfile, 'w', encoding = 'utf-8') as f:
+ f.write(myarticle2)
+def go():
+ chimpunks(change2(change1(vikings())))
+go()
+import re
+def findforms():
+ find = r"\bна(ш(ёл(ся)?|е(л(ся)?|дш(е(го(ся)?|м(ся|у(ся)?)?|е(ся)?|й(ся)?|ю(ся)?)|ую(ся)?|ая(ся)?|и(й(ся)?|е(ся)?|сь|м(и(ся)?)?|х(ся)?)?))|л(а(сь)?|о(сь)?|и(сь)?))|й(ти(сь)?|д(я(сь)?|у(сь|т(ся)?)?|ё(м(ся)?|шь(ся)?|т(ся|е(сь)?)?|нн(ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?)))|е(шь(ся)?|т(ся|е(сь)?)?|м(ся)?|н(а|о|ы|н((ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?))))?)|и(сь|те(сь)?)?)))\b"
+ arr = []
+ with open("find.txt", "r", encoding="utf-8") as f:
+ words = f.read()
+ for word in words.split():
+ p = re.search(find, word)
+ if p != None:
+ if word not in arr:
+ arr.append(word)
+ for item in arr:
+ print(item)
+findforms()
+a = []
+s = str(input("Введите слово "))
+while s != (""):
+ if len(s) > 5:
+ a.append(s)
+ s = str(input("Введите слово "))
+print('\n'.join(a))
+
+
+
+
+
+
+def counting():
+ with open('isl.txt', 'r', encoding='utf-8') as islen:
+ islen.read()
+ str = islen.readline().replace('\n', '')
+ islenlines = []
+ islencount = 0
+ for line in islen:
+ islenlines.append
+ islencount = 0
+ if '' in line:
+ break
+ print(islencount)
+
+
+
+
+counting()
+def dictionary():
+ lemmas = []
+ alsolemmas = []
+ str = islen.readline
+ for i in range(str):
+ if '(.*?)'
+ links = re.findall(reg, content)
+ return links
+text = open_html('butterflies.html')
+links = find_links(text)
+for link in links[:20]:
+ print(link[1], '-->', link[0])
+d = {"Россия":'Москва',
+ "Польша":'Варшава',
+ "США":'Вашингтон',
+ "Болгария":'София',
+ "Армения":'Ереван',
+ "Бразилия":'Бразилиа',
+ "Испания":'Москва'}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def delete_doubles(d):
+ arr = []
+ new = {}
+ for key in d:
+ if d[key] in arr:
+ else:
+ new[key] = key
+ arr.append(d[key])
+ return a
+delete_doubles(d)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+def open_html(fname):
+ with open (fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ return text
+def tags(text):
+ m = re.sub(r'<.*?>', r'', text)
+ t = re.sub(r'\s+',r' ', m)
+ s = re.sub(r'Илон Маск', r'Маленький котёнок',t)
+ return s
+print(tags(open_html('musk.html')))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+rain = r"\b\дожд([ьюи]|е|ей|я(м|ми?)|ях|ём?)?\b"
+s = input('Введите какое-нибудь слово: ')
+m = re.search(rain, s)
+if m != None:
+ print('Это слово является формой слова "дождь"!')
+else:
+ print('Нетушки!')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import codecs
+def open_file(file_name):
+ f = codecs.open(file_name, 'r', 'utf-8-sig')
+ words = []
+ for line in f:
+ line = line.strip()
+ words += line.split()
+ for word in words:
+ word = word.strip(u'.,!?:;()\'\"1234567890')
+ word = word.lower()
+ return words
+def bigramms(words):
+
+ bi = create_list(words)
+ dic = {}
+ for j in bi:
+ if j not in dic:
+ dic[j] = 1
+ else:
+ dic[j] += 1
+ answer = ''
+ answer = [n + '\r\n' for n in dic]
+ print(answer)
+ return answer
+def create_list(words):
+ bi = []
+ for i in range(len(words)):
+ if i < (len(words) - 1):
+ j = i+1
+ bi.append(words[i] + words[j])
+ return bi
+words = open_file('text.txt')
+bigramms(words)
+import re
+with open('news.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+punct = '[.,?!:;"\'—@–...«»
+tabs = '[\t\n]'
+def preprocessing(text):
+ text = text.strip().lower()
+ text = re.sub(punct, '', text)
+ text = re.sub(tabs, ' ', text)
+ words = text.split()
+ return words
+words = preprocessing(text)
+
+def make_freq(arr):
+ d = {}
+ for el in arr:
+ try:
+ d[el] += 1
+ except KeyError:
+ d[el] = 1
+ return d
+word_freq = make_freq(words)
+def make_bigrams(arr):
+ bigrams = []
+ for i in range(len(words)):
+ bigr = arr[i] + ' ' + arr[i + 1]
+ bigrams.append(bigr)
+ return bigrams
+bigrams = make_bigrams(words)
+
+bigrams_freq = make_freq(bigrams)
+from math import log
+def count_pmi(x, y):
+ bigr = x + ' ' + y
+ try:
+ p_x = word_freq[x]/len(words)
+ except KeyError:
+ p_x = 0
+ try:
+ p_y = word_freq[y]/len(words)
+ except KeyError:
+ p_y = 0
+ try:
+ p_xy = bigrams_freq[bigr]/len(bigrams)
+ except KeyError:
+ p_xy = 0
+ try:
+ pmi = log(p_xy/(p_x*p_y))
+ except ZeroDivisionError:
+ pmi = 0
+ return pmi
+def calculate_pmi():
+ pmis ={}
+ for bigr in bigrams:
+ x, y = bigr.split()
+ pmi = count_pmi(x, y)
+ pmis[bigr] = pmi
+ return pmis
+pmi = calculate_pmi()
+i = 0
+for el in sorted(pmi, key = lambda m: -pmi[m]):
+ if i > 100:
+ break
+ print(el, pmi[el])
+ i += 1
+import os
+corpus_anek = ''
+corpus_izvest = ''
+corpus_teh = ''
+for root, dirs, files in os. walk('texts'):
+ if 'anekdots' in root:
+ for f in files:
+ with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
+ text = f1.read()
+ corpus_anek += text
+ if 'teh_mol' in root:
+ for f in files:
+ with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
+ text = f1.read()
+ corpus_teh += text
+ if 'izvest' in root:
+ for f in files:
+ with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
+ text = f1.read()
+ corpus_izvest += text
+print(corpus_teh[:100])
+words_anek = preprocessing(corpus_anek)
+words_teh = preprocessing(corpus_teh)
+words_izvest = preprocessing(corpus_izvest)
+words_all = words_anek + words_teh + words_izvest
+freq_anek = make_freq(words_anek)
+freq_teh = make_freq(words_teh)
+freq_izvest = make_freq(words_izvest)
+freq_all = make_freq(words_all)
+def count_pmi_cats(word, category):
+ p_word = freq_all[word]/len(words_all)
+ p_cat = 1/3
+ if category == 'anek':
+ d = freq_anek
+ w = len(words_anek)
+ elif category == 'izvest':
+ d = freq_izvest
+ w = len(words_izvest)
+ elif category == 'teh':
+ d = freq_teh
+ w = len(words_teh)
+ p_word_cat = d[word]/w
+ pmi = log(p_word_cat/(p_word*p_cat))
+ return pmi
+for w in words:
+ if i > 100:
+ break
+ try:
+ pmi_anek = count_pmi_cats(w, 'anek')
+ pmi_izvest = count_pmi_cats(w, 'izvest')
+ pmi_teh = count_pmi_cats(w, 'teh')
+ max_pmi = max(pmi_anek, pmi_izvest, pmi_teh)
+ if max_pmi == pmi_anek:
+ print(w, 'anek')
+ elif max_pmi == pmi_izvest:
+ print(w, 'izvest')
+ elif max_pmi == pmi_teh:
+ print(w, 'teh')
+ except KeyError:
+ pass
+ i += 1
+
+
+
+
+
+
+
+
+import os
+import re
+from math import log
+punct = '[.,!«»?&@"$\[\]\(\):;%
+tabs = '[\t\n]'
+def preprocessing(text):
+ text_wo_punct = re.sub(punct, '', text.lower())
+ text_wo_punct = re.sub(tabs, ' ',text_wo_punct)
+ words = text_wo_punct.strip().split()
+ return words
+def count_tf(word, text):
+ n = text.count(word)
+ return n / len(text)
+def count_df(word, texts):
+
+
+
+
+ i = [True for text in texts if word in text]
+ i = sum(i)
+ return i
+def count_idf(word, texts):
+ df = count_df(word, texts)
+ try:
+ idf = len(texts) / df
+ except ZeroDivisionError:
+ return 0
+ return idf
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ tfidf = log(tf, 10) * log(idf, 10)
+ return tfidf
+def keywords(text, texts):
+ keywords = {}
+ dic_tfidf = {}
+ for word in text:
+ if word in dic_tfidf:
+ continue
+ tfidf = count_tfidf(word, text, texts)
+ dic_tfidf[word] = tfidf
+ i = 0
+ for el in sorted(dic_tfidf, key = lambda x: dic_tfidf[x]):
+ if i > 5:
+ break
+ i += 1
+ keywords[el] = dic_tfidf[el]
+ return keywords
+def main():
+ texts = {}
+ for root, dirs, files in os.walk('wikipedia'):
+ for f in files:
+ with open(os.path.join(root, f),'r', encoding = 'utf-8') as t:
+ content = t.read()
+ text = preprocessing(content)
+ texts[f] = text
+ raw_texts = list(texts.values())
+ for t in texts:
+ print('\nИзвлекаем ключевые слова для текста {}'.format(t))
+ kwords = keywords(texts[t], raw_texts)
+ for key in kwords:
+ print(key, kwords[key])
+if __name__ == '__main__':
+ main()
+print ("Здравствуйте!"\
+ )
+a = int(input("Введите число a: "))
+b = int(input("Введите число b: "))
+c = int(input("Введите число c: "))
+if a + b == c:
+ print ("Числа a и b в сумме дают число c")
+else:
+ print ("Числа a и b в сумме НЕ дают число c")
+if c == -b / a:
+ print ("Число c является решением линейного уравнения ax + b = 0")
+else:
+ print ("Число c НЕ является решением линейного уравнения ax + b = 0")
+import re
+import os
+def folders():
+ counter = 0
+ numbers = '[0-9]'
+ titles = os.listdir('.')
+ for i in titles:
+ if os.path.isdir(i) and re.search (numbers, i):
+ counter += 1
+ return str(counter)
+def names():
+ print('Все файлы и(или) папки в текущей папке: ')
+ arr = []
+ res = '\..+'
+ for i in os.listdir('.'):
+ name = i
+ if os.path.isdir(i):
+ name = re.sub(res, '', i)
+ if name not in arr:
+ arr.append(name)
+ for each in arr:
+ if each:
+ print(each + '\n')
+ else:
+ print('None')
+print('Количество папок с цифрами в названии в текущей папке: ' + folders())
+names()
+def open_read():
+ num = 0
+ with open('F.xml', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ num += 1
+ return num
+def write_doc(num):
+ numlines = str(num)
+ with open ('Number.txt', 'w', encoding = 'utf-8') as new_doc:
+ new_doc.write(numlines)
+ print('Количество строк: ' + numlines + '\n' + 'Создан документ Number.txt')
+def main():
+ write_doc(open_read())
+main()
+import os
+a = {}
+def dict_new():
+ for root, dirs, files in os.walk('.\\news'):
+ for file in files:
+ with open (os.path.join(root, file), 'r', encoding = 'cp1251') as page:
+ raw_text = page.read()
+ a[file] = raw_text.count('(.*?)', text, flags = re.DOTALL)
+ cap = cap.group(3)
+ return cap
+def write_doc(cap):
+ with open ('Capital.txt', 'w', encoding = 'utf-8') as new_doc:
+ new_doc.write(cap)
+ print('Столица данной страны: ' + cap + '\n' + 'Создан документ Capital.txt')
+def main():
+ write_doc(capital(open_read()))
+main()
+word = input()
+text = []
+while word:
+ text.append(word)
+ word = input()
+for i in range(len(text)):
+ new = text[i]
+ new = new[::-1]
+ new = list(new)
+ for t in range (len(new)):
+ if (t + 1) % 3 == 0:
+ new[t] = ''
+ wrd = ''.join(new)
+ print(wrd)
+with open('Master and Margarita.txt','r', encoding = 'utf-8') as MM:
+ tablewords = []
+ space = 0
+ lines = MM.readlines()
+ print(' ', *lines)
+ for i in range(len(lines)):
+ for k in range(len(lines[i])):
+ if lines[i][k] == ' ':
+ space += 1
+ tablewords.append(space + 1)
+ space = 0
+ number = 0
+ for l in range(len(tablewords)):
+ number += tablewords[l]
+ averword = number/len(lines)
+ print('\n','Среднее количество слов в строке =',averword)
+word = input('Введите слово: ')
+print(word)
+for i in range(len(word)):
+ print(word[:-(1+i)])
+import random
+with open('allwords.txt', 'r', encoding = 'utf-8') as aw:
+ lines = aw.readlines()
+
+ def noun_m1():
+ noun_m1 = []
+ noun_m1 = lines[1].split(' ')
+ return random.choice(noun_m1)
+ def noun_f1():
+ noun_f1 = []
+ noun_f1 = lines[2].split(' ')
+ return random.choice(noun_f1)
+ def noun_m2():
+ noun_m2 = []
+ noun_m2 = lines[3].split(' ')
+ return random.choice(noun_m2)
+ def noun_f2():
+ noun_f2 = []
+ noun_f2 = lines[4].split(' ')
+ return random.choice(noun_f2)
+ def noun_mid2():
+ noun_mid2 = []
+ noun_mid2 = lines[5].split(' ')
+ return random.choice(noun_mid2)
+ def noun_m3():
+ noun_m3 = []
+ noun_m3 = lines[6].split(' ')
+ return random.choice(noun_m3)
+ def noun_f3():
+ noun_f3 = []
+ noun_f3 = lines[7].split(' ')
+ return random.choice(noun_f3)
+ def noun_mid3():
+ noun_mid3 = []
+ noun_mid3 = lines[8].split(' ')
+ return random.choice(noun_mid3)
+ def noun_m4():
+ noun_m4 = []
+ noun_m4 = lines[9].split(' ')
+ return random.choice(noun_m4)
+ def noun_f4():
+ noun_f4 = []
+ noun_f4 = lines[10].split(' ')
+ return random.choice(noun_f4)
+ def noun_mid4():
+ noun_mid4 = []
+ noun_mid4 = lines[11].split(' ')
+ return random.choice(noun_mid4)
+ def noun_m5():
+ noun_m5 = []
+ noun_m5 = lines[12].split(' ')
+ return random.choice(noun_m5)
+ def noun_f5():
+ noun_f5 = []
+ noun_f5 = lines[13].split(' ')
+ return random.choice(noun_f5)
+ def noun_mid5():
+ noun_mid5 = []
+ noun_mid5 = lines[14].split(' ')
+ return random.choice(noun_mid5)
+ def noun_m6():
+ noun_m6 = []
+ noun_m6 = lines[15].split(' ')
+ return random.choice(noun_m6)
+ def noun_f6():
+ noun_f6 = []
+ noun_f6 = lines[16].split(' ')
+ return random.choice(noun_f6)
+ def noun_mid6():
+ noun_mid6 = []
+ noun_mid6 = lines[17].split(' ')
+ return random.choice(noun_mid6)
+
+ def verb_1():
+ verb_1 = []
+ verb_1 = lines[20].split(' ')
+ return random.choice(verb_1)
+ def verb_2():
+ verb_2 = []
+ verb_2 = lines[21].split(' ')
+ return random.choice(verb_2)
+ def verb_3():
+ verb_3 = []
+ verb_3 = lines[22].split(' ')
+ return random.choice(verb_3)
+ def verb_4():
+ verb_4 = []
+ verb_4 = lines[23].split(' ')
+ return random.choice(verb_4)
+ def verb_5():
+ verb_5 = []
+ verb_5 = lines[24].split(' ')
+ return random.choice(verb_5)
+ def verb_6():
+ verb_6 = []
+ verb_6 = lines[25].split(' ')
+ return random.choice(verb_6)
+
+ def conj_1():
+ conj_1 = []
+ conj_1 = lines[28].split(' ')
+ return random.choice(conj_1)
+ def conj_2():
+ conj_2 = []
+ conj_2 = lines[29].split(' ')
+ return random.choice(conj_2)
+
+ def adj_m1():
+ adj_m1 = []
+ adj_m1 = lines[32].split(' ')
+ return random.choice(adj_m1)
+ def adj_m2():
+ adj_m2 = []
+ adj_m2 = lines[33].split(' ')
+ return random.choice(adj_m2)
+ def adj_f2():
+ adj_f2 = []
+ adj_f2 = lines[34].split(' ')
+ return random.choice(adj_f2)
+ def adj_m3():
+ adj_m3 = []
+ adj_m3 = lines[35].split(' ')
+ return random.choice(adj_m3)
+ def adj_f3():
+ adj_f3 = []
+ adj_f3 = lines[36].split(' ')
+ return random.choice(adj_f3)
+ def adj_mid3():
+ adj_mid3 = []
+ adj_mid3 = lines[37].split(' ')
+ return random.choice(adj_mid3)
+ def adj_m4():
+ adj_m4 = []
+ adj_m4 = lines[38].split(' ')
+ return random.choice(adj_m4)
+ def adj_f4():
+ adj_f4 = []
+ adj_f4 = lines[39].split(' ')
+ return random.choice(adj_f4)
+ def adj_mid4():
+ adj_mid4 = []
+ adj_mid4 = lines[40].split(' ')
+ return random.choice(adj_mid4)
+ def adj_m5():
+ adj_m5 = []
+ adj_m5 = lines[41].split(' ')
+ return random.choice(adj_m5)
+ def adj_f5():
+ adj_f5 = []
+ adj_f5 = lines[42].split(' ')
+ return random.choice(adj_f5)
+ def adj_mid5():
+ adj_mid5 = []
+ adj_mid5 = lines[43].split(' ')
+ return random.choice(adj_mid5)
+ def adj_m6():
+ adj_m6 = []
+ adj_m6 = lines[44].split(' ')
+ return random.choice(adj_m6)
+ def adj_f6():
+ adj_f6 = []
+ adj_f6 = lines[45].split(' ')
+ return random.choice(adj_f6)
+ def adj_mid6():
+ adj_mid6 = []
+ adj_mid6 = lines[46].split(' ')
+ return random.choice(adj_mid6)
+
+ def adv_2():
+ adv_2 = []
+ adv_2 = lines[49].split(' ')
+ return random.choice(adv_2)
+ def adv_3():
+ adv_3 = []
+ adv_3 = lines[50].split(' ')
+ return random.choice(adv_3)
+ def adv_4():
+ adv_4 = []
+ adv_4 = lines[51].split(' ')
+ return random.choice(adv_4)
+ def adv_5():
+ adv_5 = []
+ adv_5 = lines[52].split(' ')
+ return random.choice(adv_5)
+ def adv_6():
+ adv_6 = []
+ adv_6 = lines[53].split(' ')
+ return random.choice(adv_6)
+
+ def numeral_m2():
+ numeral_m2 = []
+ numeral_m2 = lines[56].split(' ')
+ return random.choice(numeral_m2)
+ def numeral_f2():
+ numeral_f2 = []
+ numeral_f2 = lines[57].split(' ')
+ return random.choice(numeral_f2)
+ def numeral_mid2():
+ numeral_mid2 = []
+ numeral_mid2 = lines[58].split(' ')
+ return random.choice(numeral_mid2)
+ def numeral_m3():
+ numeral_m3 = []
+ numeral_m3 = lines[59].split(' ')
+ return random.choice(numeral_m3)
+ def numeral_f3():
+ numeral_f3 = []
+ numeral_f3 = lines[60].split(' ')
+ return random.choice(numeral_f3)
+ def numeral_mid3():
+ numeral_mid3 = []
+ numeral_mid3 = lines[61].split(' ')
+ return random.choice(numeral_mid3)
+ def numeral_m4():
+ numeral_m4 = []
+ numeral_m4 = lines[62].split(' ')
+ return random.choice(numeral_m4)
+ def numeral_f4():
+ numeral_f4 = []
+ numeral_f4 = lines[63].split(' ')
+ return random.choice(numeral_f4)
+ def numeral_mid4():
+ numeral_mid4 = []
+ numeral_mid4 = lines[64].split(' ')
+ return random.choice(numeral_mid4)
+ def numeral_m5():
+ numeral_m5 = []
+ numeral_m5 = lines[65].split(' ')
+ return random.choice(numeral_m5)
+ def numeral_f5():
+ numeral_f5 = []
+ numeral_f5 = lines[66].split(' ')
+ return random.choice(numeral_f5)
+ def numeral_mid2():
+ numeral_mid5 = []
+ numeral_mid5 = lines[67].split(' ')
+ return random.choice(numeral_mid5)
+ def numeral_f6():
+ numeral_f6 = []
+ numeral_f6 = lines[68].split(' ')
+ return random.choice(numeral_f6)
+ def numeral_mid6():
+ numeral_mid6 = []
+ numeral_mid6 = lines[69].split(' ')
+ return random.choice(numeral_mid6)
+ def row_1_5():
+ phrase_of_5_1 =[adj_m1() + ' ' + noun_m4(), adj_m2() + ' ' + noun_m3(), adj_m3() + ' ' + noun_m2(), adj_m4() + ' ' + noun_m1(),
+ numeral_m2() + ' ' + noun_m1() + ' ' + verb_2(), numeral_m2() + ' ' + noun_m2() + ' ' + verb_1(),
+ numeral_m2() + ' ' + noun_m3(), numeral_m3() + ' ' + noun_m1() + ' ' + verb_1(), numeral_m3() + ' ' + noun_m2(),
+ adj_f2() + ' ' + noun_f3(), adj_f3() + ' ' + noun_f2(), adj_f4() + ' ' + noun_f1(),
+ numeral_f2() + ' ' + noun_f1() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f2() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f3(),
+ numeral_f3() + ' ' + noun_f1() + ' ' + verb_1(), numeral_f3() + ' ' + noun_f2(),
+ numeral_mid2() + ' ' + verb_2(), numeral_mid2() + ' ' + noun_mid2() + ' ' + verb_1(),
+ numeral_mid2() + ' ' + noun_mid3(),
+ numeral_mid3() + ' ' + verb_1(), numeral_mid3() + ' ' + noun_mid2(),noun_m5(), noun_f5(), noun_mid5()]
+ return random.choice(phrase_of_5_1)
+ def row_1_7():
+ phrase_of_7_1 =[adv_2() + ' ' + verb_5(), adv_3() + ' ' + verb_4(), adv_4() + ' ' + verb_3(), adv_5() + ' ' + verb_2(), adv_6() + ' ' + verb_1(),
+ adv_2() + ' ' + verb_4() + ' ' + conj_1(), adv_2() + ' ' + verb_3() + ' ' + conj_2(),
+ adv_3() + ' ' + verb_3() + ' ' + conj_1(), adv_3() + ' ' + verb_2() + ' ' + conj_2(),
+ adv_4() + ' ' + verb_2() + ' ' + conj_1(), adv_4() + ' ' + verb_1() + ' ' + conj_2(),
+ adv_5() + ' ' + verb_1() + ' ' + conj_1(), adv_5() + ' ' + conj_2(),
+ adv_6() + ' ' + conj_1()]
+ return random.choice(phrase_of_7_1)
+ def row_2_5():
+ phrase_of_5_2 =[verb_1() + ' ' + noun_m4(), verb_2() + ' ' + noun_m3(), verb_3() + ' ' + noun_m2(), verb_4() + ' ' + noun_m1(),
+ verb_1() + ' ' + noun_f4(), verb_2() + ' ' + noun_f3(), verb_3() + ' ' + noun_f2(), verb_4() + ' ' + noun_f1(),
+ verb_1() + ' ' + noun_mid4(), verb_2() + ' ' + noun_mid3(), verb_3() + ' ' + noun_mid2()]
+ return random.choice(phrase_of_5_2)
+ def row_2_7():
+ phrase_of_7_2 =[noun_m1() + ' ' + verb_6(),noun_m2() + ' ' + verb_5(),noun_m3() + ' ' + verb_4(),noun_m4() + ' ' +verb_3(),
+ noun_m5() + ' ' + verb_2(),
+ noun_m6() + ' ' + verb_1(),
+ noun_f1() + ' ' + verb_6(), noun_f2() + ' ' + verb_5(), noun_f3() + ' ' + verb_4(), noun_f4() + ' ' + verb_3(),
+ noun_f5() + ' ' + verb_2(),
+ noun_f6() + ' ' + verb_1(), noun_mid2() + ' ' + verb_5(), noun_mid3() + ' ' + verb_4(), noun_mid4() + ' ' + verb_3(),
+ noun_mid5() + ' ' + verb_2(),
+ noun_mid6() + ' ' + verb_1()]
+ return random.choice(phrase_of_7_2)
+ def row_3_5():
+ phrase_of_5_3 =[verb_5(), adv_5()]
+ return random.choice(phrase_of_5_3)
+ def haiku():
+ ready = [row_2_5() + '\n' + row_2_7() + '\n' + row_1_5(),
+ row_3_5() + '\n' + row_2_7() + '\n' + row_3_5(),
+ row_1_5() + '\n' + row_1_7() + '\n' + row_3_5()]
+ return random.choice(ready)
+print(haiku())
+word = input ('give a word')
+lenghth = len(word)
+z = 0
+newword ='space'
+while newword != '':
+ newword = ''
+ newword = word[z:lenghth]
+ print(newword)
+ z += 1
+ lenghth -= 1
+
+import re
+def sentences():
+ with open ('text.txt','r',encoding = 'utf-8') as f:
+ text = f.read()
+ m = re.findall('[^.!?]{1,}?[.?!]', text)
+ m= [sent.split() for sent in m]
+ for sentence in m:
+ for i in range(len(sentence)):
+ sentence[i] = sentence[i].strip('!?.,;:"').lower()
+ return m
+def output(m):
+ maxi = max([len(word) for sentence in m for word in sentence])
+ sentence_number = 0
+ for sentence in m:
+ sentence_number += 1
+ print ('предложение №', sentence_number)
+ words = []
+ for word in sentence:
+ if word not in words:
+ words.append(word)
+ j = 0
+ for i in range(0, len(sentence) - 1):
+ if word == sentence[i]:
+ j += 1
+ if j > 1:
+ print('{:^{maxi}} {:^2}'.format(word,j, maxi = maxi))
+output(sentences())
+
+import csv
+import random
+def open_file():
+ with open('some.csv', 'r') as f:
+ a =[]
+ reader = csv.reader(f)
+ for line in reader:
+ a.append(line)
+ return a
+def dictionary(a):
+ d = {}
+ for i in range(0,5):
+ d[a[0][i]] = a[1][i]
+ return d
+def answer(d,a):
+ word = random.choice(list(d.values()))
+ for key in d:
+ if d[key] == word:
+ print('твоя подсказка:',key)
+ while True:
+ ans = input('введи слово')
+ if ans == word:
+ return random.choice(a[2])
+ else:
+ print(random.choice(a[3]))
+print('мы загадали слово для тебя')
+print(answer(dictionary(open_file()),open_file()))
+
+import re
+def open_text():
+ words = []
+ with open('text.txt', 'r', encoding ='utf-8') as f:
+ text = f.read().lower()
+ text = text.split()
+ for item in text:
+ item = item.strip('.,?!-')
+ if item not in words:
+ words.append(item)
+ return words
+def answer(words):
+ for item in words:
+ m = re.match( r'\bси(д(и(шь|те?|м)?|е(л(о|а|и)?|в(ш(и(й|ми?|е|х)?|е(го|му?|е|й|ю)|ая|ую))?|ть)|я(т|щ(и(й|ми?|е|х)|е(го|му?|е|й|ю)|ая|ую))?)|жу)\b', item)
+ if m != None:
+ print(item)
+sit = answer(open_text())
+
+quantity = 0
+percent = 0
+f = open('newy.txt','r',encoding ='utf-8')
+for line in f:
+ quantity += 1
+ a = line.split()
+ if len(a) > 5:
+ percent += 1
+ else:
+ continue
+ a = []
+f.close()
+if percent == 0 or quantity == 0:
+ print(' no lines like this')
+else:
+ print ('the number of lines:', percent / quantity * 100)
+import os
+def walking():
+ d = {root : len(files) for root, dirs, files in os.walk('.')}
+ maxi = max(d.values())
+ for key in d:
+ if d[key] == maxi:
+ print ('there are',maxi,'files in',key)
+walking()
+import re
+def open():
+ with open('ptitsi.html','r', encoding = 'utf-8') as f:
+ content = f.read()
+ return content
+def substitute(content):
+ content = re.sub('<.*?>','', content, flags = re.DOTALL)
+ content = re.sub(r'(\n| ){2,}','' ,content, flags = re.DOTALL)
+ content = re.sub('птиц(а(ми?|х)|ы|е(й|ю)?|у)?','рыб\\1', content)
+ content = re.sub('Птиц(а(ми?|х)|ы|е(й|ю)?|у)?','Рыб\\1', content)
+ return content
+def write(content):
+ with open('text.txt','w', encoding = 'utf-8') as f:
+ f.write(content)
+print(write(substitute(open()))
+
+
+
+import random
+def imperative():
+ with open('imperatives.txt', 'r',encoding = 'utf-8') as f:
+ imperatives =[]
+ for line in f:
+ newword = line.strip()
+ imperatives.append(newword)
+ return random.choice(imperatives)
+def noun_acc():
+ with open('nouns_Acc_Sg&Pl.txt', 'r',encoding = 'utf-8') as f:
+ noun_accs =[]
+ for line in f:
+ newword = line.strip()
+ noun_accs.append(newword)
+ return random.choice(noun_accs)
+def ins_phrase():
+ with open('clitics_Ins.txt', 'r',encoding = 'utf-8') as f:
+ clitics = []
+ for line in f:
+ newword = line.strip()
+ clitics.append(newword)
+ with open('nouns_Ins.txt', 'r',encoding = 'utf-8') as g:
+ noun_inss = []
+ for line in g:
+ newword = line.strip()
+ noun_inss.append(newword)
+ return random.choice(clitics) + ' ' + random.choice(noun_inss)
+def noun_pl():
+ with open('nouns_ Nom=Acc_Pl.txt', 'r',encoding = 'utf-8') as f:
+ noun_pls = []
+ for line in f:
+ newword = line.strip()
+ noun_pls.append(newword)
+ return random.choice(noun_pls)
+def noun_sg():
+ with open('nouns_Nom=Acc_Sg.txt', 'r',encoding = 'utf-8') as f:
+ noun_sgs = []
+ for line in f:
+ newword = line.strip()
+ noun_sgs.append(newword)
+ return random.choice(noun_sgs)
+def verb():
+ with open('verbs_Pl.txt', 'r',encoding = 'utf-8') as f:
+ verbs = []
+ for line in f:
+ newword = line.strip()
+ verbs.append(newword)
+ return random.choice(verbs)
+def adverb():
+ with open('adverbs.txt', 'r',encoding = 'utf-8') as f:
+ adverbs = []
+ for line in f:
+ newword = line.strip()
+ adverbs.append(newword)
+ return random.choice(adverbs)
+def punctuation():
+ marks = ['.', '!', '...']
+ return random.choice(marks)
+def type1():
+ return imperative() + ' ' + noun_acc() + punctuation()
+def type2():
+ return noun_pl() + ' ' + verb() + punctuation()
+def type3():
+ return imperative() + ' ' + ins_phrase() + punctuation()
+def type4():
+ return noun_pl() + ' ' + verb() + ' ' + noun_pl() + punctuation()
+def type5():
+ return noun_pl() + ' ' + verb() + ' ' + noun_sg() + punctuation()
+def type6():
+ return ins_phrase() + ' ' + imperative() + ' ' + noun_sg() + punctuation()
+def type7():
+ return imperative() + ' ' + noun_acc() + ' ' + adverb() + punctuation()
+def tanka(i):
+ line =''
+ if (i == 1) or (i == 3):
+ line = random.choice([1,2,3])
+ if line == 1:
+ line = type1()
+ if line == 2:
+ line = type2()
+ if line == 3:
+ line = type3()
+ else:
+ line = random.choice([4,5,6,7])
+ if line == 4:
+ line = type4()
+ if line == 5:
+ line = type5()
+ if line == 6:
+ line = type6()
+ if line == 7:
+ line = type7()
+ return line
+def printing():
+ for i in range(1,6):
+ print(tanka(i))
+a = printing()
+
+def open_text(text):
+ with open(text, 'r', encoding ='utf-8') as f:
+ text = f.read().lower()
+ words = text.split()
+ return words
+def percent(words, number):
+ i,j = 0,0
+ for item in words:
+ if item[0:2] =='un':
+ i+=1
+ if len(item) > number:
+ j +=1
+ if i != 0:
+ print('the number of words:', i)
+ return str(round(j / i * 100)) + '%'
+ else:
+ return 'no matching words were found'
+def questions():
+ text = input(' Please, enter the name of the text')
+ number = int(input(' Please, enter the lenght'))
+ words = open_text(text)
+ answer = percent(words, number)
+ return answer
+print('your result is', questions())
+n = int(input( ))
+w = 0
+i = 0
+while w <= n:
+ w = 2**i
+ i += 1
+ if w % 2 == 0 and w <= n:
+ print (w)
+import re
+def open_text():
+ with open('archi.html','r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def search(text):
+ m = re.search(r'title="Коды языков".*?title="ISO (\d\d\d)"',text, flags = re.DOTALL)
+ return m.group(1)
+def write(z):
+ with open('archi.txt','w', encoding = 'utf-8') as f:
+ f.write(z)
+archi = write(search(open_text()))
+
+import os
+import re
+def search():
+ count = 0
+ a =[]
+ for f in os.listdir():
+ if os.path.isdir(f) and f not in a:
+
+
+ lat = re.search('.*[a-zA-z].*', str(f))
+ rus = re.search('.*[а-яА-ЯЁё].*', str(f))
+ if lat != None and rus != None:
+ count+=1
+ a.append(f)
+ if count == 1:
+ print('1 dir was found', end = '')
+ else:
+ print (count, 'dirs were found ', end ='')
+ if a != [] :
+ print( ':'+', '.join(a))
+search()
+
+count = 0
+arr = ['','','','']
+while count < 4:
+ s = input('vvedi slovo')
+ arr [ int(count)] += s
+ s = ''
+ count += 0.5
+for i in range (0,4):
+ print(arr[i])
+a = int(input('введи а'))
+b = int(input('введи b'))
+c = int(input('введи с'))
+if a / b == c:
+ print('а разделить на b равно с')
+else:
+ print('а разделить на b не равно с')
+if a ** b == c:
+ print(' а в степени b равно c')
+else:
+ print(' а в степени b не равно с')
+
+with open ('hw5.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines ()
+ list_1 = []
+ for line in lines:
+ line = line.split()
+ n = len (line)
+ list_1.append (n)
+ sum_list = 0
+ sum_line = 0
+ for elem in list_1:
+ if elem > 5:
+ sum_list += 1
+ sum_line += 1
+ else:
+ sum_list += 1
+ percent = (sum_line / sum_list) * 100
+ print (percent, '% строк содержит больше 5 слов')
+
+
+
+import re
+def opentext(text):
+ with open(text, 'r', encoding = 'utf-8') as f:
+ sentences = f.read()
+ text = re.sub('\.(\.\.)?|\?', '!', sentences)
+ list_ = text.split('!')
+ return list_
+def text_format(text):
+ text = opentext(text)
+ text1 = [re.sub('( - )|( — )|( ‒ )', ' ', i) for i in text]
+ sents = [sent.split() for sent in text1]
+ sents2 = [[i.strip('.,?!":;
+ sents3 = [[i.lower() for i in sent] for sent in sents2]
+ return sents3
+def search(text):
+ sentences = text_format(text)
+ repeated = [[w for w in sent if sent.count(w) > 1] for sent in sentences]
+ return repeated
+def count(text):
+ a = search(text)
+ b = opentext(text)
+ for i in range(len(a)):
+ if a[i]:
+ print (str(b[i]) + '\n')
+ c = {w : a[i].count(w) for w in a[i]}
+ keys = c.keys()
+ for key in keys:
+ print ('{:^10}'.format(key) + '{:^10}'.format(c[key]))
+text = input('Введите название файла: ')
+count(text)
+
+
+
+
+
+
+
+import re
+def opentext(text):
+ with open(text, 'r', encoding = 'utf-8') as f:
+ text = f.readlines()
+ list_ = []
+ for line in text:
+ line = line.split()
+ list_.extend(line)
+ words = []
+ for i in range(len(list_)):
+ a = list_[i]
+ a = a.strip('.,?!"":;*()%$
+ words.append(a)
+ return words
+def find_form():
+ form = 'си((жу)|д((и((шь)|м|(те?))?)|(е((ть)|(л(а|и|о)?)|(в(ш((и(й|е|х|(ми?))?)|(е((го)|(му?)|й|е)?)|(ая)|(ую))))))|(я(щ((и(й|(ми?)|х|е))|(е((го)|(му?)|й|е))|(ая)|(ую)))?)))'
+ form2 = 'буд((ут?)|(е(м|(шь)|(те?))))'
+ words = opentext(text)
+ forms = []
+ for i in range(len(words)):
+ m = re.search(form, words[i])
+ if m != None:
+ if words[i] == 'сидеть' and re.search(form2, words[i-1]) != None:
+ form_fut = words[i-1] + ' ' + words[i]
+ if form_fut not in forms:
+ forms.append(form_fut)
+ else:
+ continue
+ else:
+ if words[i] not in forms:
+ forms.append(words[i])
+ else:
+ continue
+ else:
+ continue
+ return forms
+text = input('Введите название файла: ')
+m = find_form()
+print ('Формы глагола "сидеть", встретившиеся в тексте:')
+for i in range(len(m)):
+ print (m[i], end = '\n')
+l = []
+for i in range(8):
+ l.append (input())
+print (l[0]+l[1])
+print (l[2]+l[3])
+print (l[4]+l[5])
+print (l[6]+l[7])
+
+import os
+import re
+def list_files(path):
+ files_list = []
+ for d, dirs, files in os.walk(path):
+ for f in files:
+ path_f = os.path.join(d, f)
+ files_list.append(path_f)
+ return files_list
+def open_file(f):
+ with open(f, 'r', encoding = 'utf-8') as k:
+ text = k.readlines()
+ return text
+def count_sent(path):
+ files = list_files(path)
+ list_sent = {}
+ for f in files:
+ b = re.search('(_.*?.xhtml)', f)
+ f_name = b.group(1)
+ sent = 0
+ file_text = open_file(f)
+ for line in file_text:
+ if re.search('', line) != None:
+ sent = sent + 1
+ list_sent[f_name] = sent
+ return list_sent
+def file_format_sent(path):
+ sent = count_sent(path)
+ with open('task1.txt', 'w', encoding = 'utf-8')as k:
+ for key in sent.keys():
+ k.write(key + '\t' + str(sent[key]) + '\n')
+
+def inf(f):
+ text = open_file(f)
+ inf = {}
+ for line in text:
+ author = re.search('content="(.*?)" name="author"', line)
+ if author != None:
+ author1 = author.group(1)
+ for line in text:
+ topic = re.search('content="(.*?)" name="topic"', line)
+ if topic != None:
+ topic1 = topic.group(1)
+ inf[author1] = topic1
+ return inf
+def create_csv(path):
+ files = list_files(path)
+ with open('task2.csv', 'w', encoding = 'utf-8') as k:
+ for f in files:
+ infa = inf(f)
+ f_name = re.search('(_.*?.xhtml)', f).group(1)
+ for key in infa.keys():
+ k.write(str(f_name) + '\t' + str(key) + '\t' + str(infa[key]) + '\n')
+
+def pr_loc(f):
+ text = open_file(f)
+ bigrams = []
+ for i in range(len(text)):
+ pr = re.search('gr="PR"', text[i])
+ if pr != None:
+ prep = re.search('(.*?)', text[i]).group(1)
+ loc = re.search('"S.*?loc', text[i+1])
+ if loc != None:
+ S_loc = re.search('(.*?)', text[i+1]).group(1)
+ bigrams.append(prep + ' ' + S_loc)
+ return bigrams
+def text_without_tegs(f):
+ text = open_file(f)
+ text_w_t = ''
+ for line in text:
+ if re.search('', line) != None:
+ word = re.search('(.*?)', line).group(1)
+ prep = re.search('(.)()?', line)
+ if prep != None:
+ if prep.group(1) == '.' or prep.group(1) == '!' or prep.group(1) == '?':
+ text_w_t = text_w_t + ' ' + word + prep.group(1)+'\n'
+ else:
+ text_w_t = text_w_t + ' ' + word + prep.group(1)
+ else:
+ text_w_t = text_w_t + ' ' + word
+ return text_w_t
+
+
+
+
+
+
+
+
+
+
+
+def bigr(path):
+ files = list_files(path)
+ with open('task3.txt', 'w', encoding = 'utf-8') as k:
+ for f in files:
+ for b in pr_loc(f):
+ k.write(b + '\n')
+path = 'C:\\Users\\1\\Documents\\ниу вшэ\\КИЛИ и программирование\\python\\экзамен\\news'
+file_format_sent(path)
+create_csv(path)
+bigr(path)
+
+
+
+
+
+
+import random
+def adjective_Abl_m():
+ with open('adjective_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def adjective_Abl_f():
+ with open('adjective_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def noun_Abl_m():
+ with open('noun_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def noun_Abl_f():
+ with open('noun_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def noun_phrase():
+ with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ prep = random.choice(line)
+ while prep != 'в' and prep != 'к' and prep != 'с':
+ prep = random.choice(line)
+ if prep == 'в' or prep == 'к':
+ with open('noun_verse1_prep1.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ else:
+ with open('noun_verse1_prep2.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ return prep.title() + ' ' + noun1
+def noun_Gen():
+ with open('noun_Gen_verse1.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def verse11():
+ return adjective_Abl_m().title() + ' ' + noun_Abl_m()
+def verse12():
+ return adjective_Abl_f().title() + ' ' + noun_Abl_f()
+def verse13():
+ return noun_phrase() + ' ' + noun_Gen()
+def participle_adj():
+ with open('participle_adjective_verse2.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def subject():
+ with open('subject_verse2.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def place():
+ with open('places_verse2.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split(', ')
+ return random.choice(line)
+def obj_f():
+ with open('adjective_obj_verse2_f.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ adj = random.choice(line)
+ with open('object_verse2_f.txt', 'r', encoding = 'utf-8') as k:
+ objects = k.readlines()
+ for obj in objects:
+ obj = obj.split()
+ obj = random.choice(obj)
+ return adj + ' ' + obj
+def obj_m():
+ with open('object_verse2_m.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ obj1 = random.choice(line)
+ with open('object_Gen_verse2_m.txt', 'r', encoding = 'utf-8') as k:
+ objects = k.readlines()
+ for obj in objects:
+ obj = obj.split()
+ obj2 = random.choice(obj)
+ with open('adjective_obj_verse2_m.txt', 'r', encoding = 'utf-8') as l:
+ adjectives = l.readlines()
+ for adjective in adjectives:
+ adjective = adjective.split()
+ adj = random.choice(adjective)
+ return adj + ' ' + obj2 + ' ' + obj1
+def verse21():
+ return participle_adj().title() + ' ' + subject() + ' ' + place() + '.'
+def verse22():
+ with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ verb = random.choice(line)
+ return verb.title() + ' ' + obj_f()
+def verse23():
+ with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ verb = random.choice(line)
+ return verb.title() + ' ' + obj_m()
+def verb_feel():
+ with open('verb_feelings.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def verse31():
+ with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ prep = random.choice(line)
+ with open('base_noun_verse3.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ base_noun = random.choice(noun)
+ if prep == 'под' or prep == 'над':
+ if base_noun == 'мор' or base_noun == 'солнц':
+ noun = base_noun + 'ем'
+ else:
+ noun = base_noun + 'ом'
+ elif prep == 'у' or prep == 'от' or prep == 'из':
+ if base_noun == 'мор':
+ noun = base_noun + 'я'
+ else:
+ noun = base_noun + 'а'
+ elif prep == 'при' or prep == 'на':
+ noun = base_noun + 'е'
+ elif prep == 'с':
+ if base_noun == 'мор' or base_noun == 'солнц':
+ noun = base_noun + 'ем'
+ else:
+ noun = base_noun + 'ом'
+ prep = 'как с'
+ elif prep == 'в':
+ noun = base_noun + 'е'
+ prep = 'как в'
+ elif prep == 'к':
+ if base_noun == 'мор':
+ noun = base_noun + 'ю'
+ else:
+ noun = base_noun + 'у'
+ prep = 'как к'
+ else:
+ if base_noun == 'мор':
+ noun = base_noun + 'ю'
+ else:
+ noun = base_noun + 'у'
+ return verb_feel().title() + ',' + ' ' + prep + ' ' + noun
+def verse32():
+ with open('participle_verse3.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ participle = random.choice(line)
+ with open('subject_verse3.txt', 'r', encoding = 'utf-8') as k:
+ subjects = k.readlines()
+ for sub in subjects:
+ sub = sub.split()
+ subject = random.choice(sub)
+ return participle.title() + ' ' + subject + '.'
+def verse41():
+ with open('noun_verse41_1.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ noun1 = random.choice(line)
+ with open('prepositions.txt', 'r', encoding = 'utf-8') as k:
+ preps = k.readlines()
+ for preposition in preps:
+ preposition = preposition.split()
+ prep = random.choice(preposition)
+ while prep == 'в' or prep == 'к' or prep == 'с':
+ prep = random.choice(preposition)
+ if prep == 'под' or prep == 'над':
+ with open('noun_verse41_2.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'у' or prep == 'от' or prep == 'из':
+ with open('noun_verse41_3.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'при':
+ with open('noun_verse41_4.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'на':
+ with open('noun_verse41_5.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ else:
+ with open('noun_verse41_6.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ if noun1 == 'дрожь' or noun1 == 'ночь' or noun1 == 'сталь' or noun1 == 'тень' or noun1 == 'кровь' or noun1 == 'плеть':
+ with open('verb_verse41_1.txt', 'r', encoding = 'utf-8') as l:
+ verbs = l.readlines()
+ for verb in verbs:
+ verb = verb.split()
+ verb1 = random.choice(verb)
+ else:
+ with open('verb_verse41_2.txt', 'r', encoding = 'utf-8') as l:
+ verbs = l.readlines()
+ for verb in verbs:
+ verb = verb.split()
+ verb1 = random.choice(verb)
+ return noun1.title() + ' ' + prep + ' ' + noun2 + ' ' + verb1 + '.'
+def noun42():
+ with open('object_verse42.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ return random.choice(line)
+def the_end_of_the_line():
+ with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ line.append('во' and 'со' and 'ко')
+ line.remove('под')
+ line.remove('у')
+ line.remove('от')
+ line.remove('по')
+ line.remove('из')
+ prep = random.choice(line)
+ if prep == 'во':
+ with open('noun_verse42_1.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'со':
+ noun = 'мной'
+ elif prep == 'ко':
+ with open('noun_verse42_2.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'при' or prep == 'на':
+ if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'):
+ with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ while noun2 == 'ней':
+ noun2 = random.choice(noun)
+ else:
+ with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ while noun2 == 'нем':
+ noun2 = random.choice(noun)
+ elif prep == 'в':
+ with open('noun_verse42_4.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'с':
+ with open('noun_verse42_5.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ elif prep == 'к':
+ with open('noun_verse42_6.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ else:
+ if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'):
+ with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ while noun2 == 'ней':
+ noun2 = random.choice(noun)
+ else:
+ with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun2 = random.choice(noun)
+ while noun2 == 'нем':
+ noun2 = random.choice(noun)
+ return prep.title() + ' ' + noun2
+def verse42():
+ with open('pronoun_verse4.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ pronoun = random.choice(line)
+ return verb_feel().title() + ' ' + noun42() + ' ' + pronoun + '... ' + the_end_of_the_line()
+def verse51():
+ with open('pronoun_verse5.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ pronoun = random.choice(line)
+ if pronoun == 'вся' or pronoun == 'та':
+ with open('adjective_verse5_f_4.txt', 'r', encoding = 'utf-8') as k:
+ adjectives = k.readlines()
+ for adjective in adjectives:
+ adjective = adjective.split()
+ adj = random.choice(adjective)
+ with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ elif pronoun == 'весь' or pronoun == 'тот':
+ with open('adjective_verse5_m_3.txt', 'r', encoding = 'utf-8') as k:
+ adjectives = k.readlines()
+ for adjective in adjectives:
+ adjective = adjective.split()
+ adj = random.choice(adjective)
+ with open('noun_verse5_m.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ else:
+ with open('adjective_verse5_f_3.txt', 'r', encoding = 'utf-8') as k:
+ adjectives = k.readlines()
+ for adjective in adjectives:
+ adjective = adjective.split()
+ adj = random.choice(adjective)
+ with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l:
+ nouns = l.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ return pronoun.title() + ' ' + adj + ' ' + noun1 + '.'
+def verse52():
+ with open('parenthesis_verse5.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.split()
+ parenthesis = random.choice(line)
+ with open('noun_verse52.txt', 'r', encoding = 'utf-8') as k:
+ nouns = k.readlines()
+ for noun in nouns:
+ noun = noun.split()
+ noun1 = random.choice(noun)
+ if noun1 == 'звезда' or noun1 == 'вуаль' or noun1 == 'туман':
+ with open('verb_verse52_sg.txt', 'r', encoding = 'utf-8') as l:
+ verbs = l.readlines()
+ for verb in verbs:
+ verb = verb.split()
+ verb1 = random.choice(verb)
+ else:
+ with open('verb_verse52_pl.txt', 'r', encoding = 'utf-8') as l:
+ verbs = l.readlines()
+ for verb in verbs:
+ verb = verb.split()
+ verb1 = random.choice(verb)
+ return parenthesis.title() + ' ' + noun1 + ' ' + verb1 + '?!'
+def poem():
+ variant = random.choice([1, 2, 3, 4, 5, 6])
+ if variant == 1:
+ var = random.choice([1, 2])
+ if var == 1:
+ return verse11() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
+ else:
+ return verse12() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
+ elif variant == 2:
+ var = random.choice([1, 2])
+ if var == 1:
+ return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+ else:
+ return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+ elif variant == 3:
+ var = random.choice([1, 2, 3, 4])
+ if var == 1:
+ return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ elif var == 2:
+ return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ elif var == 3:
+ return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ else:
+ return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ elif variant ==4:
+ return verse13() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
+ elif variant == 5:
+ var = random.choice([1, 2])
+ if var == 1:
+ return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ else:
+ return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
+ else:
+ var = random.choice([1, 2, 3, 4])
+ if var == 1:
+ return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+ elif var == 2:
+ return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+ elif var == 3:
+ return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+ else:
+ return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
+print (poem())
+
+
+
+import re
+def open_file():
+ with open('Птицы.html', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def sub_word():
+ word1 = '\\bпти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b'
+ word2 = '\\bПти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b'
+ s = re.sub(word1, 'рыб\\1', open_file())
+ m = re.sub(word2, 'Рыб\\1', s)
+ return m
+def add_file():
+ with open('Замена.html', 'w', encoding = 'utf-8') as k:
+ k.write(sub_word())
+ return k
+add_file()
+
+def data (year, month, day):
+ if month > 12:
+ return False
+ else:
+ if day >= 31:
+ return False
+ else:
+ if day == 31 and (month == 2 or month == 4 or month == 9 or month == 11 or month == 6):
+ return False
+ else:
+ if day == 30 and month == 2:
+ return False
+ else:
+ if day == 29 and month == 2 and (year % 4 != 0 or (year % 100 == 0 and year % 1000 != 0)):
+ return False
+ elif day == 16 and month == 12 and year == 1998:
+ print ("Вы угадали день рождения разработчика! Не забудьте его поздравить :)")
+ else:
+ return True
+year = input ("Введите год (натуральное число): ")
+month = input ("Введите месяц (натуральное число до 12 включительно): ")
+day = input ("Введите день (натуральное число до 31 включительно): ")
+while year and month and day:
+ if data (int(year), int(month), int(day)) == True:
+ print ("Такая дата есть в календаре:)")
+ elif data (int(year), int(month), int(day)) == False:
+ print ("Простите, но такой даты нету:(")
+ else:
+ print (data (int(year), int(month), int(day)))
+ print ("Попробуем снова:)")
+ year = input ("Введите год (натуральное число): ")
+ month = input ("Введите месяц (натуральное число до 12 включительно): ")
+ day = input ("Введите день (натуральное число до 31 включительно): ")
+print ("Все!:)")
+a = int (input ())
+b = int (input ())
+c = int (input ())
+s = (a + 1) // 2 + (b + 1) // 2 + (c + 1) // 2
+print (s)
+
+
+print (os.path.abspath('.'))
+print (os.getcwd())
+os.path.join('texts', '1.txt')
+
+os.path.exists('texts')
+print (os.listdir('.'))
+
+s = 'hello'
+i = 1
+texts = [f for f in os.listdir('.') if f.endswith('.txt')]
+print (texts)
+for f in os.listdir('.'):
+ if f.endswith('.txt'):
+ with open(f, 'a', encoding = 'utf-8') as w:
+ w.write (s*i)
+ i += 1
+os.mkdir('corpus1')
+os.makedirs('a\\b\\long\\long')
+os.rename('texts\\1.txt', 'texts\\2.txt')
+os.path.isfile(r'texts\corpus1.txt')
+os.path.isdir(r'texts')
+shutil.copy(r'texts\2.txt', r'new_corpus\2.txt')
+shutil.move('откуда', 'куда')
+shutil.copytree('папка', 'папка2')
+os.remove(r'new_corpus\2.txt')
+shutil.rmtree('corpus')
+
+
+
+def align_right(arr):
+ for i in arr:
+ print ('{:>40}'.format(i))
+arr = ['abba', 'assa', 'adda', 'affa']
+align_right(arr)
+
+
+
+
+def tokenize(text):
+ tokens = text.split()
+ tokens1 = [t.strip('.,?!":;*()-— ') for t in tokens]
+ tokens2 = [t.lower() for t in tokens1]
+ return tokens2
+text = 'Инициатива публикации лучших дисциплин исходила в том числе от Студсовета. Чуть ранее представители Студенческого совета получили возможность использовать результаты СОП при обсуждении возникающих проблем и спорных моментов. Теперь все студенты смогут использовать опубликованную информацию — агрегированное мнение своих предшественников — при формировании собственной индивидуальной образовательной траектории.'
+print(tokenize(text))
+
+
+
+def tabulate(a):
+ for i in range(0, len(a)):
+ print('{:<10}'.format(a[i][0]) + '{:^10}'.format(a[i][1]) + '{:>10}'.format(a[i][2]))
+a = [('кошки','собаки','коровы'), ('мяу','гав','му'), (3,3,2)]
+tabulate(a)
+x = int (input ())
+if x > 0:
+ sign = 1
+elif x < 0:
+ sign = -1
+else:
+ sign = 0
+print (sign)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+a = int (input ())
+b = int (input ())
+if a < b:
+ print (a)
+else:
+ print (b)
+x = int (input ('введите целое число x = '))
+print ('вы ввели число', x)
+res = x*55/100+33
+print ('результат вычислений x * 55 / 100 + 33 =', res)
+a = int (input ('введите длину первого катета a = '))
+b = int (input ('введите длину второго катета b = '))
+S = a * b / 2
+print (S)
+
+
+import re
+def func1(regw, word1):
+ word = input('Введите слово: ')
+ m = re.search(regw, word)
+ if m != None:
+ return 'Данное слово является формой слова ' + word1
+ else:
+ return 'Данное слово не является формой слова ' + word1
+word1 = 'свобода'
+regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b'
+
+
+def if_any(s, regw):
+ m = re.search(regw, s)
+ s = s.split()
+ p = []
+ for i in range(len(s)):
+ m = re.search(regw, s[i])
+ if m != None:
+ p = p.append(s[i])
+ else:
+ continue
+ return 'Слово встречается в тексте ' + len(p) + ' раз'
+s = 'Свободу попугаям!'
+print(if_any(s, regw))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+
+
+
+
+
+
+
+
+import os
+import shutil
+import re
+
+def make_folders_sent(s):
+ sent = s.split()
+ b = '\\'.join(sent)
+ os.makedirs(b)
+s = input('Пожалуйста, введите предложение (без знаков препинания!) \n')
+make_folders_sent(s)
+
+def make_folders_num(n):
+ for i in range(1,n+1):
+ os.mkdir(str(i))
+ for a in range(i):
+ name = str(i) + '\\' + str(a+1) + '.txt'
+ file = open(name, 'w', encoding = 'utf-8')
+ file.write('Hello!')
+n = int(input('Пожалуйста, введите натуральное число \n'))
+make_folders_num(n)
+
+def count():
+ filelist = [f for f in os.listdir('.') if os.path.isfile(f)]
+ exts = []
+ for f in filelist:
+ ext = f.split('.')[-1]
+ exts.append(ext)
+ c = {e : exts.count(e) for e in exts}
+ keys = c.keys()
+ for key in keys:
+ print('{:^10}'.format(key) + '{:^10}'.format(c[key]))
+count()
+name = input ('Введите ваше имя: ')
+age = input ('Сколько вам лет? ')
+colour = input ('Какой ваш любимый цвет? ')
+music = input ('Кто ваш любимый музыкальный исполнитель? ')
+dream = input ('Какова ваша заветная мечта? ')
+with open ('information.txt', 'w', encoding = 'utf-8') as f:
+ f.write ('Информация о соседе\n')
+ f.write (name + '\n' + age + '\n' + colour + '\n' + music + '\n' + dream)
+with open('Austen_Jane_Pride_and_Prejudice.txt', 'r', encoding = 'utf-8') as f:
+ text = f.readlines()
+ list_ = []
+ for line in text:
+ line = line.split()
+ list_.extend(line)
+print (list_)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+
+
+
+
+
+
+
+
+
+with open ('freq.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines ()
+ for line in lines:
+ if 'союз' in line:
+ print (line)
+
+
+with open ('freq.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines ()
+ a = []
+ for line in lines:
+ line = line.split ()
+ if 'жен' in line and 'ед' in line:
+ print (line[0], end = ', ')
+ a.append (line[-1])
+ ipm_sum = 0
+ for elem in a:
+ elem = float (elem)
+ ipm_sum += elem
+ print (ipm_sum)
+
+
+with open ('freq.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines ()
+ word = input ()
+ while word:
+ for line in lines:
+ line = line.split()
+ if word in line:
+ print ('Морфологическая информация: ' + ' '.join (line[2:-2]))
+ print ('IPM = ' + line[-1])
+ word = input ()
+import random
+
+
+with open ('words.txt', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ random.shuffle(lines)
+ score = 0
+ for line in lines:
+ line = line.strip ()
+ word, hint = line.split(' ', 1)
+ response = input ('Какое слово я загадала?\n ' + 'Подсказка: ' + hint + ' ')
+ if response == word:
+ print ('Правильно, молодец!')
+ score += 1
+ else:
+ print ('А вот и нет, слово было ', word)
+with open ('scores.txt', 'w', encoding = 'utf-8') as n:
+ percent = score / 5 * 100
+ n.write('Вот результат\n')
+ n.write(str(percent) + '%')
+
+
+import re
+def func1(regw, word1):
+ word = input('Введите слово: ')
+ m = re.search(regw, word)
+ if m != None:
+ return 'Данное слово является формой слова ' + word1
+ else:
+ return 'Данное слово не является формой слова ' + word1
+word1 = 'свобода'
+regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b'
+
+
+def if_any(s, regw):
+ m = re.search(regw, s)
+ s = s.split()
+ p = []
+ for i in range(len(s)):
+ m = re.search(regw, s[i])
+ if m != None:
+ p = p.append(s[i])
+ else:
+ continue
+ return 'Слово встречается в тексте ' + len(p) + ' раз'
+s = 'Свободу попугаям!'
+print(if_any(s, regw))
+
+
+
+
+
+import re
+def opentext(a):
+ with open(a, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def delete_tags():
+ s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL)
+ return s
+def delete_odd():
+ s = re.sub ('(\\s)+', '\\1', delete_tags())
+ return s
+
+
+
+
+name = input('Введите название файла: ')
+
+print (delete_odd())
+n = int (input ())
+hour = n // 60
+if hour >= 24:
+ k = hour // 24
+ hour = hour - k * 24
+minute = n % 60
+print (hour, minute)
+
+
+
+import re
+def opentext(a):
+ with open (a, 'r', encoding = 'utf-8') as f:
+ content = f.read()
+ return content
+def find_all_links():
+ reg = r'(.*?)'
+ links = re.findall(reg, opentext(a))
+ return links
+a = input('Введите название файла: ')
+
+
+
+
+def pictures():
+ reg = r'(.*?) '
+ pictures = re.findall(reg, opentext(a))
+ return pictures
+pictures = pictures()
+print ('Подписи к картинкам: ')
+for picture in pictures:
+ print (picture[2])
+
+
+
+
+
+
+
+def opentext(fname):
+ with open(fname, 'r', encoding = 'utf-8') as f:
+ text = f.readlines()
+ for line in text:
+ line = line.split()
+ list_ = []
+ for i in range (0, len(line)):
+ a = line[i]
+ a = a.lower()
+ a = a.strip('.,?!";:"*()')
+ list_.append(a)
+ return list_
+
+
+
+
+def first_letter(letter):
+ fname = input('введите название файла: ')
+ text = opentext(fname)
+ words_letter = []
+ for i in range(len(text)):
+ if text[i].startswith(letter) == True:
+ words_letter.append(text[i])
+ else:
+ continue
+ return words_letter
+
+
+
+
+def questions():
+ letter = input('введите первую букву: ')
+ number = int(input('введите число: '))
+ words = first_letter(letter)
+ result = []
+ for i in range(len(words)):
+ if len(words[i]) > number:
+ result.append(words[i])
+ else:
+ continue
+ return result
+print (questions())
+a = int (input ('введите первое число '))
+b = int (input ('введите второе число '))
+c = int (input ('введите третье число '))
+s = a + b + c
+print (s)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import os
+def drawtree():
+ for root, dirs, files in os.walk('C:\\Users\\1\\Documents\\ниу вшэ'):
+ num = root.count('\\')
+ new_root = root.split('\\')[-1]
+ print('\t'*num+'--'+new_root+'\n')
+ for f in files:
+ print((num+1)*'\t'+f)
+drawtree()
+name = input ()
+print ('Hello, ' + name + '!')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+def opentext(a):
+ with open(a, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def delete_tags():
+ s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL)
+ return s
+def delete_odd():
+ s = re.sub ('(\\s)+', '\\1', delete_tags())
+ return s
+
+
+
+
+name = input('Введите название файла: ')
+
+print (delete_odd())
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import re
+def opentext(a):
+ with open (a, 'r', encoding = 'utf-8') as f:
+ content = f.read()
+ return content
+def find_all_links():
+ reg = r'(.*?)'
+ links = re.findall(reg, opentext(a))
+ return links
+a = input('Введите название файла: ')
+
+
+
+
+def pictures():
+ reg = r'(.*?) '
+ pictures = re.findall(reg, opentext(a))
+ return pictures
+pictures = pictures()
+print ('Подписи к картинкам: ')
+for picture in pictures:
+ print (picture[2])
+num = int (input ())
+t = 1
+while t * 2 <= num:
+ t = t * 2
+ print (t)
+
+
+
+def opentext(fname):
+ with open(fname, 'r', encoding = 'utf-8') as f:
+ text = f.readlines()
+ list_ = []
+ for line in text:
+ line = line.split()
+ list_.extend(line)
+ words = []
+ for i in range(len(list_)):
+ a = list_[i]
+ a = a.lower()
+ a = a.strip('.,?!";:"*()')
+ words.append(a)
+ return words
+def un_forms():
+ text = opentext(fname)
+ words_un = []
+ for i in range(len(text)):
+ if text[i].startswith('un') == True:
+ words_un.append(text[i])
+ else:
+ continue
+ return words_un
+def quantity():
+ words = un_forms()
+ return len(words)
+def percentage(number):
+ words = un_forms()
+ s = 0
+ for i in range(len(words)):
+ if len(words[i]) > number:
+ s += 1
+ else:
+ continue
+ result = s / len(words) * 100
+ return result
+fname = input('Введите название файла: ')
+number = int(input('Введите число: '))
+print ('Количество слов с приставкой un- равно ', quantity())
+print ('Процент слов с приставкой un- длинее ', number, ' равен ', percentage(number))
+
+
+
+import os
+import shutil
+import re
+def all_files():
+ ff = os.listdir('.')
+ file_names = []
+ for f in ff:
+ if os.path.isfile(f):
+ a = f.split('.')
+ if a[-1].isdigit() or re.search(r'\s', a[-1]) != None:
+ a = '.'.join(a)
+ elif len(a) > 2:
+ a[0] = '.'.join(a[:-1])
+ name = a[0]
+ file_names.append(name)
+ return file_names
+def all_dirs():
+ ff = os.listdir('.')
+ dir_names = []
+ for f in ff:
+ if os.path.isdir(f):
+ name = f
+ dir_names.append(name)
+ return dir_names
+def all_without_rep():
+ names_file = all_files()
+ names_dir = all_dirs()
+ names = names_file + names_dir
+ names_1 = []
+ for name in names:
+ if name not in names_1:
+ names_1.append(name)
+ return names_1
+def out_nice():
+ names = all_without_rep()
+ print('Список папок и файлов в текущей директории: ')
+ for name in names:
+ print (name)
+def cyrill_latin_symb_fold():
+ names = all_dirs()
+ lat = '[a-zA-Z]'
+ cyr = '[а-яА-Я]'
+ cyr_lat_dirs = [name for name in names if re.search(lat, name) != None and re.search(cyr, name) != None]
+ return len(cyr_lat_dirs)
+out_nice()
+print ('Количество папок, содержащих и латинские, и кириллические символы, равно: ', cyrill_latin_symb_fold())
+
+
+
+
+
+
+import random
+def file():
+ with open('dictionary.csv', 'r', encoding = 'utf-8') as f:
+ lines = f.readlines()
+ d = {}
+ for line in lines:
+ line = line.split(';')
+ d[line[0]] = line[1].strip('\n')
+ return d
+def right():
+ with open('Верные ответы.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.split('\n')
+ return random.choice(text)
+def wrong():
+ with open('Неверные ответы.txt', 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ text = text.split('\n')
+ return random.choice(text)
+def zagadka(d):
+ keys = d.keys()
+ keys = list(keys)
+ key = random.choice(keys)
+ print ('Подсказка: ' + key + '...')
+ answer = input('Введите ответ: ')
+ if answer == d[key]:
+ return(right())
+ else:
+ return(wrong() + ' Верный ответ ' + d[key] + '.')
+d = file()
+a = input('Хочешь поиграть? Введи "да" или "нет":)\n')
+while a == 'да':
+ print(zagadka(d))
+ a = input('Хочешь сыграть еще раз?:) Введи "да" или "нет"\n')
+print ('До свидания!')
+a = int (input ())
+b = int (input ())
+c = int (input ())
+if a / b == c:
+ print (a, "разделить на", b, "равно", c)
+else:
+ print (a, "разделить на", b, "не равно", c)
+if a ** b == c:
+ print (a, "в степени", b, "равно", c)
+else:
+ print (a, "в степени", b, "не равно", c)
+
+
+import os
+def max_dir():
+ a = {}
+ for root, dirs, files in os.walk(os.path.abspath('.')):
+ a[root] = len(files)
+ max_v = max(a.values())
+ if max_v == 1:
+ print('Наибольшее количество файлов (' + str(max_v) +' файл) в директориях: ')
+ elif max_v == 2 or max_v == 3 or max_v == 4:
+ print('Наибольшее количество файлов (' + str(max_v) +' файла) в директориях: ')
+ else:
+ print('Наибольшее количество файлов (' + str(max_v) +' файлов) в директориях: ')
+ for key in a.keys():
+ if a[key] == max_v:
+ print(key)
+max_dir()
+
+
+
+
+import re
+def open_file(a):
+ with open(a, 'r', encoding = 'utf-8') as f:
+ text = f.read()
+ return text
+def find_ISO():
+ reg = 'ISO 639-3(\\w{3})'
+ m = re.search(reg, open_file(a), flags = re.DOTALL)
+ if m:
+ ISO = m.group(2)
+ return ISO
+ else:
+ return 'В статье не указано ISO 639-3'
+def add_file():
+ with open('Result.txt', 'a', encoding = 'utf-8') as k:
+ k.write('\n')
+ k.write(a)
+ k.write(': ')
+ k.write(find_ISO())
+ return k
+a = input('Введите название статьи в формате Название.html: ')
+add_file()
+word = input ('введите слово: ')
+for i in range (1,len(word)):
+ print (word[i:-i])
+with open('dict.csv', 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ a = dict()
+ for line in lines:
+ line = line.strip('\n')
+ key, value = line.split(':', 1)
+ a[key] = value
+ for key in a:
+ b = input('Угадай слово. Вот подсказка: '+a[key])
+ if b == key:
+ print('Правильно!')
+ else:
+ t = 0
+ while b != key and t <= (len(key)-1):
+ b = input('Неправильно, попробуй еще раз: ')
+ t += 1
+ else:
+ print('Правильно!')
+import os
+
+
+
+
+def greatestway():
+ depth = []
+ for root, dirs, files in os.walk('.', topdown=False):
+ a = str(root).count('/')
+ if a not in depth:
+ depth.append(a)
+ return max(depth)
+print(greatestway())
+import re
+import os
+
+
+
+def openfile():
+ for root, dirs, files in os.walk('.\\news2'):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text:
+ file_text = text.read()
+ file_text = re.sub('<.*?>', '', file_text)
+ file_text2 = file_text.split('.')
+ count = len(file_text2)
+ print(f, ' ', count)
+ return
+def meta():
+ for root, dirs, files in os.walk('.\\news2'):
+ for f in files:
+ with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text:
+ file_text = text.read()
+ writer = re.match('', file_text).group(1)
+ topic = re.match('', file_text).group(1)
+ with open('.\\table.csv', 'w', encoding='utf-8') as csv_f:
+ heading_string = 'Файл' + ' ' + 'Автор' + ' ' + 'Тема'
+ csv_f.write(heading_string)
+ with open('.\\table.csv', 'a', encoding='utf-8') as csv_writer:
+ string = f + ' ' + writer + ' ' + topic
+ csv_writer.write(string)
+ return
+print(openfile())
+print(meta())
+import re
+def openf():
+ with open('F.xml', 'r', encoding='utf=8') as f:
+ lines = f.readlines()
+ return lines
+def countli():
+ lines = openf()
+ linecount = 0
+ for line in lines:
+ linecount += 1
+ results = 'result.txt'
+ with open(results, 'w', encoding='utf-8') as n:
+ n.write(str(linecount))
+ return results
+def dicfreq():
+ lines = openf()
+ types = []
+ for line in lines:
+ l = str(line)
+ if 'lemma' in l:
+ reg = re.search(r'', l)
+ types.append(reg.group(2))
+ freq = {}
+ for i in range(len(types)):
+ if types[i] not in freq:
+ freq[types[i]] = 1
+ else:
+ freq[types[i]] += 1
+ with open('keys.txt', 'w', encoding='utf-8') as te:
+ te.write('\n'.join(freq.keys()))
+ return freq
+print(countli(), dicfreq())
+import re
+fname = input('Введите название файла: ')
+def openfile(fname):
+ with open(fname,'r', encoding='utf-8') as f:
+ text = f.read()
+ return text
+def sentences():
+ text = openfile(fname)
+ text = text.strip()
+ se = re.split('\\b[.!?\\n]+(?=\\s)', text)
+ return se
+def find8():
+ se = sentences()
+ greater7 = []
+ for i in se:
+ words = i.split(' ')
+ words = [str(w).strip('?!&(),.:;«»\n”“ ') for w in words]
+ greater = []
+ greater += [w for w in words if len(w) > 7]
+ template = '{} {:->10}'
+ for g in greater:
+ print(template.format(g,len(g)))
+ return
+print(find8())
+
+import re
+import os
+import shutil
+filename = []
+unique = []
+name = ''
+def numberinf():
+ number = 0
+ for f in os.listdir('REALEC'):
+ name = str(f)
+ b = re.sub(r'\.\D+', '', name)
+ c = re.search(r'\d', b)
+ if c != None:
+ number += 1
+ return number
+def foldername():
+ for f in os.listdir('REALEC'):
+ name = str(f)
+ b = re.sub(r'\.\D+', '', name)
+ filename.append(b)
+ for n in filename:
+ if n != '' and n not in unique:
+ unique.append(n)
+ return unique
+print(numberinf(), foldername())
+import random
+with open('words.txt', 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.strip()
+ def nm1():
+ nm1 = list()
+ nm1 = lines[0]
+ nm1 = nm1.strip()
+ nm1 = nm1.split(' ')
+ nm1.remove(nm1[0])
+ return random.choice(nm1)
+ def nf1():
+ nf1 = list()
+ nf1 = lines[1]
+ nf1 = nf1.strip()
+ nf1 = nf1.split(' ')
+ nf1.remove(nf1[0])
+ return random.choice(nf1)
+ def nm2():
+ nm2 = list()
+ nm2 = lines[2]
+ nm2 = nm2.strip()
+ nm2 = nm2.split(' ')
+ nm2.remove(nm2[0])
+ return random.choice(nm2)
+ def nf2():
+ nf2 = list()
+ nf2 = lines[3]
+ nf2 = nf2.strip()
+ nf2 = nf2.split(' ')
+ nf2.remove(nf2[0])
+ return random.choice(nf2)
+ def nm3():
+ nm3 = list()
+ nm3 = lines[4]
+ nm3 = nm3.strip()
+ nm3 = nm3.split(' ')
+ nm3.remove(nm3[0])
+ return random.choice(nm3)
+ def nf3():
+ nf3 = list()
+ nf3 = lines[5]
+ nf3 = nf3.strip()
+ nf3 = nf3.split(' ')
+ nf3.remove(nf3[0])
+ return random.choice(nf3)
+ def nm4():
+ nm4 = list()
+ nm4 = lines[6]
+ nm4 = nm4.strip()
+ nm4 = nm4.split(' ')
+ nm4.remove(nm4[0])
+ return random.choice(nm4)
+ def nf4():
+ nf4 = list()
+ nf4 = lines[7]
+ nf4 = nf4.strip()
+ nf4 = nf4.split(' ')
+ nf4.remove(nf4[0])
+ return random.choice(nf4)
+ def nm5():
+ nm5 = list()
+ nm5 = lines[8]
+ nm5 = nm5.strip()
+ nm5 = nm5.split(' ')
+ nm5.remove(nm5[0])
+ return random.choice(nm5)
+ def nf5():
+ nf5 = list()
+ nf5 = lines[9]
+ nf5 = nf5.strip()
+ nf5 = nf5.split(' ')
+ nf5.remove(nf5[0])
+ return random.choice(nf5)
+ def nm6():
+ nm6 = list()
+ nm6 = lines[10]
+ nm6 = nm6.strip()
+ nm6 = nm6.split(' ')
+ nm6.remove(nm6[0])
+ return random.choice(nm6)
+ def nf6():
+ nf6 = list()
+ nf6 = lines[11]
+ nf6 = nf6.strip()
+ nf6 = nf6.split(' ')
+ nf6.remove(nf6[0])
+ return random.choice(nf6)
+ def adjm1():
+ adjm1 = list()
+ adjm1 = lines[12]
+ adjm1 = adjm1.strip()
+ adjm1 = adjm1.split(' ')
+ adjm1.remove(adjm1[0])
+ return random.choice(adjm1)
+ def adjm2():
+ adjm2 = list()
+ adjm2 = lines[13]
+ adjm2 = adjm2.strip()
+ adjm2 = adjm2.split(' ')
+ adjm2.remove(adjm2[0])
+ return random.choice(adjm2)
+ def adjf2():
+ adjf2 = list()
+ adjf2 = lines[14]
+ adjf2 = adjf2.strip()
+ adjf2 = adjf2.split(' ')
+ adjf2.remove(adjf2[0])
+ return random.choice(adjf2)
+ def adjm3():
+ adjm3 = list()
+ adjm3 = lines[15]
+ adjm3 = adjm3.strip()
+ adjm3 = adjm3.split(' ')
+ adjm3.remove(adjm3[0])
+ return random.choice(adjm3)
+ def adjf3():
+ adjf3 = list()
+ adjf3 = lines[16]
+ adjf3 = adjf3.strip()
+ adjf3 = adjf3.split(' ')
+ adjf3.remove(adjf3[0])
+ return random.choice(adjf3)
+ def adjm4():
+ adjm4 = list()
+ adjm4 = lines[17]
+ adjm4 = adjm4.strip()
+ adjm4 = adjm4.split(' ')
+ adjm4.remove(adjm4[0])
+ return random.choice(adjm4)
+ def adjf4():
+ adjf4 = list()
+ adjf4 = lines[18]
+ adjf4 = adjf4.strip()
+ adjf4 = adjf4.split(' ')
+ adjf4.remove(adjf4[0])
+ return random.choice(adjf4)
+ def adjm5():
+ adjm5 = list()
+ adjm5 = lines[19]
+ adjm5 = adjm5.strip()
+ adjm5 = adjm5.split(' ')
+ adjm5.remove(adjm5[0])
+ return random.choice(adjm5)
+ def adjf5():
+ adjf5 = list()
+ adjf5 = lines[20]
+ adjf5 = adjf5.strip()
+ adjf5 = adjf5.split(' ')
+ adjf5.remove(adjf5[0])
+ return random.choice(adjf5)
+ def adjm6():
+ adjm6 = list()
+ adjm6 = lines[21]
+ adjm6 = adjm6.strip()
+ adjm6 = adjm6.split(' ')
+ adjm6.remove(adjm6[0])
+ return random.choice(adjm6)
+ def adjf6():
+ adjf6 = list()
+ adjf6 = lines[22]
+ adjf6 = adjf6.strip()
+ adjf6 = adjf6.split(' ')
+ adjf6.remove(adjf6[0])
+ return random.choice(adjf6)
+ def v1():
+ v1 = list()
+ v1 = lines[23]
+ v1 = v1.strip()
+ v1 = v1.split(' ')
+ v1.remove(v1[0])
+ return random.choice(v1)
+ def v2():
+ v2 = list()
+ v2 = lines[24]
+ v2 = v2.strip()
+ v2 = v2.split(' ')
+ v2.remove(v2[0])
+ return random.choice(v2)
+ def v3():
+ v3 = list()
+ v3 = lines[25]
+ v3 = v3.strip()
+ v3 = v3.split(' ')
+ v3.remove(v3[0])
+ return random.choice(v3)
+ def v4():
+ v4 = list()
+ v4 = lines[26]
+ v4 = v4.strip()
+ v4 = v4.split(' ')
+ v4.remove(v4[0])
+ return random.choice(v4)
+ def v5():
+ v5 = list()
+ v5 = lines[27]
+ v5 = v5.strip()
+ v5 = v5.split(' ')
+ v5.remove(v5[0])
+ return random.choice(v5)
+ def v6():
+ v6 = list()
+ v6 = lines[28]
+ v6 = v6.strip()
+ v6 = v6.split(' ')
+ v6.remove(v6[0])
+ return random.choice(v6)
+ def partm3():
+ partm3 = list()
+ partm3 = lines[29]
+ partm3 = partm3.strip()
+ partm3 = partm3.split(' ')
+ partm3.remove(partm3[0])
+ return random.choice(partm3)
+ def partm4():
+ partm4 = list()
+ partm4 = lines[30]
+ partm4 = partm4.strip()
+ partm4 = partm4.split(' ')
+ partm4.remove(partm4[0])
+ return random.choice(partm4)
+ def partf4():
+ partf4 = list()
+ partf4 = lines[31]
+ partf4 = partf4.strip()
+ partf4 = partf4.split(' ')
+ partf4.remove(partf4[0])
+ return random.choice(partf4)
+ def partm5():
+ partm5 = list()
+ partm5 = lines[32]
+ partm5 = partm5.strip()
+ partm5 = partm5.split(' ')
+ partm5.remove(partm5[0])
+ return random.choice(partm5)
+ def partf5():
+ partf5 = list()
+ partf5 = lines[33]
+ partf5 = partf5.strip()
+ partf5 = partf5.split(' ')
+ partf5.remove(partf5[0])
+ return random.choice(partf5)
+ def partm6():
+ partm6 = list()
+ partm6 = lines[34]
+ partm6 = partm6.strip()
+ partm6 = partm6.split(' ')
+ partm6.remove(partm6[0])
+ return random.choice(partm6)
+ def partf6():
+ partf6 = list()
+ partf6 = lines[35]
+ partf6 = partf6.strip()
+ partf6 = partf6.split(' ')
+ partf6.remove(partf6[0])
+ return random.choice(partf6)
+ def conj1():
+ conj1 = list()
+ conj1 = lines[36]
+ conj1 = conj1.strip()
+ conj1 = conj1.split(' ')
+ conj1.remove(conj1[0])
+ return random.choice(conj1)
+ def conj2():
+ conj2 = list()
+ conj2 = lines[37]
+ conj2 = conj2.strip()
+ conj2 = conj2.split(' ')
+ conj2.remove(conj2[0])
+ return random.choice(conj2)
+ def numm2():
+ numm2 = list()
+ numm2 = lines[38]
+ numm2 = numm2.strip()
+ numm2 = numm2.split(' ')
+ numm2.remove(numm2[0])
+ return random.choice(numm2)
+ def numf2():
+ numf2 = list()
+ numf2 = lines[39]
+ numf2 = numf2.strip()
+ numf2 = numf2.split(' ')
+ numf2.remove(numf2[0])
+ return random.choice(numf2)
+ def numm3():
+ numm3 = list()
+ numm3 = lines[40]
+ numm3 = numm3.strip()
+ numm3 = numm3.split(' ')
+ numm3.remove(numm3[0])
+ return random.choice(numm3)
+ def numf3():
+ numf3 = list()
+ numf3 = lines[41]
+ numf3 = numf3.strip()
+ numf3 = numf3.split(' ')
+ numf3.remove(numf3[0])
+ return random.choice(numf3)
+ def numm4():
+ numm4 = list()
+ numm4 = lines[42]
+ numm4 = numm4.strip()
+ numm4 = numm4.split(' ')
+ numm4.remove(numm4[0])
+ return random.choice(numm4)
+ def numf4():
+ numf4 = list()
+ numf4 = lines[43]
+ numf4 = numf4.strip()
+ numf4 = numf4.split(' ')
+ numf4.remove(numf4[0])
+ return random.choice(numf4)
+ def numm5():
+ numm5 = list()
+ numm5 = lines[44]
+ numm5 = numm5.strip()
+ numm5 = numm5.split(' ')
+ numm5.remove(numm5[0])
+ return random.choice(numm5)
+ def numf5():
+ numf5 = list()
+ numf5 = lines[45]
+ numf5 = numf5.strip()
+ numf5 = numf5.split(' ')
+ numf5.remove(numf5[0])
+ return random.choice(numf5)
+ def adv2():
+ adv2 = list()
+ adv2 = lines[46]
+ adv2 = adv2.strip()
+ adv2 = adv2.split(' ')
+ adv2.remove(adv2[0])
+ return random.choice(adv2)
+ def adv3():
+ adv3 = list()
+ adv3 = lines[47]
+ adv3 = adv3.strip()
+ adv3 = adv3.split(' ')
+ adv3.remove(adv3[0])
+ return random.choice(adv3)
+ def adv4():
+ adv4 = list()
+ adv4 = lines[48]
+ adv4 = adv4.strip()
+ adv4 = adv4.split(' ')
+ adv4.remove(adv4[0])
+ return random.choice(adv4)
+ def adv5():
+ adv5 = list()
+ adv5 = lines[49]
+ adv5 = adv5.strip()
+ adv5 = adv5.split(' ')
+ adv5.remove(adv5[0])
+ return random.choice(adv5)
+ def adv6():
+ adv6 = list()
+ adv6 = lines[50]
+ adv6 = adv6.strip()
+ adv6 = adv6.split(' ')
+ adv6.remove(adv6[0])
+ return random.choice(adv6)
+ def random_line_5_1():
+ sentence5_1 = [adjm4() + ' ' + nm1(), adjm3() + ' ' + nm2(), adjm2() + ' ' + nm3(), adjm4() + ' ' + nm1(),
+ adjf4() + ' ' + nf1(), adjf3() + ' ' + nf2(), adjf2() + ' ' + nf2(), adjf4() + ' ' + nf1(),
+ partm3() + ' ' + nm2(), partm4() + ' ' + nm1(), partf4() + ' ' + nf1(),
+ nm5(), nf5(),
+ numm2() + ' ' + adjm1() + ' ' + nm2(), numm2() + ' ' + adjm2() + ' ' + nm1(),
+ numm3() + ' ' + adjm1() + ' ' + nm1(), numm4() + ' ' + nm1(),
+ numf2() + ' ' + adjf2() + ' ' + nf1(), numf2() + ' ' + nf3(), numf3() + ' ' + nf2()]
+ return random.choice(sentence5_1)
+ def random_line_7_1():
+ sentence7_1 = [adjm6() + ' ' + nm1(), adjm5() + ' ' + nm2(), adjm4() + ' ' + nm3(), adjm3() + ' ' + nm4(),
+ adjm2() + ' ' + nm5(), adjm1() + ' ' + nm6(),
+ adjf6() + ' ' + nf1(), adjf5() + ' ' + nf2(), adjf4() + ' ' + nf3(), adjf3() + ' ' + nf4(),
+ adjf2() + ' ' + nf5(),
+ partm6() + ' ' + nm1(), partm5() + ' ' + nm2(), partm4() + ' ' + nm3(), partm3() + ' ' + nm4(),
+ partf6() + ' ' + nf1(), partf5() + ' ' + nf2(), partf4() + ' ' + nf3()]
+ return random.choice(sentence7_1)
+ def random_line_7_2():
+ sentence7_2 = [v6() + ' ' + conj1(), adv2() + ' ' + v4() + ' ' + conj1(), adv3() + ' ' + v3() + ' ' + conj1()]
+ return random.choice(sentence7_2)
+ def random_line_5_2():
+ sentence5_2 = [v5(), adv2() + ' ' + v3(), adv3() + ' ' + v2(), adv4() + ' ' + v1()]
+ return random.choice(sentence5_2)
+ def random_line_7_3():
+ sentence7_3 = [adv2() + ' ' + v5(), adv3() + ' ' + v4(), adv4() + ' ' + v3(), adv5() + ' ' + v2(),
+ adv6() + ' ' + v1()]
+ return random.choice(sentence7_3)
+ def poem():
+ p = [random_line_5_1() + '.\n' + random_line_7_1() + '.\n' + random_line_5_1(),
+ random_line_5_1() + '\n' + random_line_7_2() + '\n' + random_line_5_2(),
+ random_line_5_1() + '\n' + random_line_7_3() + '.\n' + random_line_5_1(),
+ random_line_5_1() + '\n' + random_line_7_3() + ',\n' + random_line_5_2()]
+ return random.choice(p)
+ print(poem()+'.')
+fname = input('Введите название файла: ')
+def openfile(fname):
+ with open(fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ text = text.strip()
+ words = []
+ words = text.split(' ')
+ return words
+def count_words(fname):
+ words = openfile(fname)
+ n = 0
+ for word in words:
+ word = word.strip('?!@
+ n += 1
+ return n
+def dicff(fname):
+ words = openfile(fname)
+ words.sort()
+ fr = dict()
+ for index in range(len(words)):
+ if words[index] in fr:
+ fr[words[index]] += 1
+ else:
+ fr[words[index]] = 1
+ return fr
+
+
+print(count_words(fname), dicff(fname))
+import re
+fname = input('Введите название файла: ')
+def openfile(fname):
+ with open(fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ text = text.strip()
+ words = []
+ words = text.split(' ')
+ return words
+def words(fname):
+ words = openfile(fname)
+ a = []
+ for word in words:
+ word = word.strip('?!@
+ a.append(word)
+ return a
+regex = r'\bоткр(ыл[аи]?|о(ют?|е(шь|т|м|те))|ыть)\b'
+def formsearch(regex):
+ wordlist = words(fname)
+ match = []
+ for i in wordlist:
+ i1 = str(i)
+ m = re.search(regex, i1)
+ if m != None:
+ match.append(i)
+ strmatch = '\n'.join(match)
+ return strmatch
+print(formsearch(regex))
+fname = input('Введите название файла: ')
+def openfile(fname):
+ with open(fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ text = text.lower()
+ text = text.strip()
+ words = []
+ words = text.split(' ')
+ return words
+def ingform(fname):
+ words = openfile(fname)
+ a = []
+ for word in words:
+ word = word.strip('?!@
+ if word.endswith('ing'):
+ a.append(word)
+ else:
+ continue
+ return a
+theword = input('Введите слово: ')
+def searching(theword):
+ s = ingform(fname)
+ b = 0
+ for i in s:
+ if i == theword:
+ b += 1
+ else:
+ continue
+ return b
+print(ingform(fname))
+print(searching(theword))
+import re
+fname = input('Введите название файла: ')
+def open_html(fname):
+ with open(fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ return text
+def find_capital(fname):
+ text = open_html(fname)
+ card = re.search(r'', text)
+ if card != None:
+ capital = re.search(r'data-wikidata-property-id="P36"(.*?)(.*?)', text)
+ if capital != None:
+ return capital.group(3)
+def find_country(fname):
+ text = open_html(fname)
+ card = re.search(r'', text)
+ if card != None:
+ country = re.search(r'>(.*?)', text)
+ if country != None:
+ return country.group(1)
+print('Страна: ', find_country(fname), 'Столица: ', find_capital(fname))
+import re
+fname = input('Введите название файла: ')
+def open_html(fname):
+ with open(fname, 'r', encoding='utf-8') as f:
+ text = f.read()
+ te = re.sub(u'<.*?(".*?")?.*?>', u'', text, flags = re.U)
+ te2 = re.sub(u'', u'', te, flags = re.U)
+ te3 = re.sub(u'', u'', te2, flags = re.U)
+ te4 = re.sub(u'.*?', u'', te3, flags = re.U)
+ return te4
+def changeform(fname):
+ te = open_html(fname)
+ change1 = re.sub(u'комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'слон\\1', te, flags = re.U)
+ change2 = re.sub(u'Комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'Слон\\1', change1, flags = re.U)
+ with open('results.txt', 'w', encoding='utf-8') as n:
+ n.write(change2)
+ return 'Готово! Результаты в файле results.txt .'
+print(changeform(fname))
+
+
+import os
+import re
+from math import log
+punct = '[.,!«»?&@"$\[\]\(\):;%
+tabs = '[\t\n]'
+def preprocessing(text):
+ text_wo_punct = re.sub(punct, '', text.lower())
+ text_wo_punct = re.sub(tabs, ' ', text_wo_punct)
+ words = text_wo_punct.strip().split()
+ return words
+def count_tf(word, text):
+ n = text.count(word)
+ return n / len(text)
+def count_df(word, texts):
+
+
+
+
+ i = [1 for text in texts if word in text]
+ i = sum(i)
+ return i
+def count_idf(word, texts):
+ df = count_df(word, texts)
+ try:
+ idf = len(texts) / df
+ except ZeroDivisionError:
+ return 0
+ return idf
+def count_tfidf(word, text, texts):
+ tf = count_tf(word, text)
+ idf = count_idf(word, texts)
+ tfidf = log(tf, 10)*log(idf, 10)
+ return tfidf
+def keywords(text, texts):
+ keywords = {}
+ dic_tfidf = {}
+ for word in text:
+ if word in dic_tfidf:
+ continue
+ tfidf = count_tfidf(word, text, texts)
+ dic_tfidf[word] = tfidf
+ i = 0
+ for el in sorted(dic_tfidf, key= lambda x: dic_tfidf[x]):
+ if i > 5:
+ break
+ i += 1
+ keywords[el] = dic_tfidf[el]
+ return keywords
+def main():
+ texts = {}
+ for root, dirs, files in os.walk('wikipedia'):
+ for f in files:
+ with open(os.path.join(root,f), 'r', encoding='utf-8') as t:
+ content = t.read()
+ text = preprocessing(content)
+ texts[f] = text
+ raw_texts = list(texts.values())
+ for t in texts:
+ print('\nИзвлекаем ключевые слова для текста {}'.format(t))
+ kwords = keywords(texts[t], raw_texts)
+ for key in kwords:
+ print(key, kwords[key])
+if __name__ == '__main__':
+ main()
| |