File size: 8,494 Bytes
8e098e4 69506c8 89f6f64 ec203e8 2015526 f9f08c9 01cbf56 114f5e2 a13ba48 01cbf56 a13ba48 8e098e4 f9f08c9 561236d f9f08c9 a13ba48 f9f08c9 d5fa84f f9f08c9 d5fa84f f9f08c9 114f5e2 4d23ca2 114f5e2 4d23ca2 a13ba48 7526f95 a13ba48 4d23ca2 f9f08c9 a13ba48 4d23ca2 f57506f f9f08c9 f0d8b59 11902fd f9f08c9 417af67 a13ba48 d5fa84f f9f08c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import streamlit as st
import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from datetime import datetime
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import requests
import networkx as nx
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
def make_graph():
G = nx.DiGraph()
G = nx.Graph()
G.add_edges_from([('Nimfadora T','Tad T'),
('Andromeda B','Nimfadora T'),
('Andromeda B','Tad T'),
('Andromeda B','Kingus B'),
('Druela R','Kingus B'),
('Andromeda B','Druela R'),
('Narcisa B','Druela R'),
('Narcisa B', 'Kingus B'),
('Lucius M','Narcisa B'),
('Draco M', 'Lucius M'),
('Draco M', 'Narcisa B'),
('Draco M','Astoria G'),
('Scorpius M','Astoria G'),
('Scorpius M', 'Draco M'),
('Rimus L','Nimfadora T'),
('Ted L', 'Rimus L'),
('Ted L','Nimfadora T')])
# Отображение графа
fig, ax = plt.subplots()
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos, with_labels=True, node_color='lightblue', node_size=500, edge_color='gray', width=2, alpha=0.7, ax=ax)
st.pyplot(fig)
def linear_regression():
df = pd.read_csv('imdb_top_1000.csv')
df['Runtime'] = df['Runtime'].astype(str)
df['IMDB_Rating'] = df['IMDB_Rating'].astype(str)
df['Runtime'] = df['Runtime'].str.replace(r'\D', '')
df['IMDB_Rating'] = df['IMDB_Rating'].str.replace(r'\D', '').astype(float)
X = df['Runtime'].values.reshape(-1, 1)
y = df['IMDB_Rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
return mse
print('Mean Squared Error:', mse)
def createDriver(url):
firefoxOptions = Options()
firefoxOptions.add_argument("--headless")
service = Service(GeckoDriverManager().install())
driver = webdriver.Firefox(
options=firefoxOptions,
service=service,
)
driver.get(url)
time.sleep(2)
main_page = driver.page_source
soup = BeautifulSoup(main_page, 'html.parser')
return soup
def scrape_weather_data(soup):
data = soup.find(class_="chronicle-table").find('tbody').find_all('tr')
data_value = data[244].find_all('nobr')
data_month = data[0].find_all('td')
temp = []
temp_month = []
for i in data_value:
temp.append(float(i.text))
for j in range(0, len(data_month)):
temp_month.append(data_month[j].text)
temp_month.pop()
temp.pop()
return temp_month, temp
def get_weather_data(station, start_date, end_date):
if station == "Barnaul":
station = "Asia/Barnaul"
elif station == "Moscow":
station = "Europe/Moscow"
else:
station = "Europe/Berlin"
url = "https://meteostat.p.rapidapi.com/stations/hourly"
querystring = {"station":"10637","start":start_date,"end":end_date,"tz":station}
headers = {
"X-RapidAPI-Key": "9c8eb62f1fmsh82eba345d265b05p1541b2jsna3309cd23406",
"X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}
response = requests.get(url, headers=headers, params=querystring)
data = response.json()
data_mas = []
for j in data['data']:
data_date = j['time']
windy_date = str(j['dwpt'])+" km/h"
osadki = str(j['prcp']) + " mm"
temperature =str(j['temp']) + " °C"
data_mas.append([data_date, temperature, windy_date, osadki])
return data_mas
def process_data(data):
df = pd.DataFrame(data)
df = df.rename(columns={ 0 : 'date_time', 1: 'temperature', 2: 'wind_speed', 3: 'humidity'})
# Преобразование типов данных и очистка данных
df["temperature"] = df["temperature"].str.extract(r"(\d+)").astype(float)
df["humidity"] = df["humidity"].str.extract(r"(\d+)").astype(float)
df["wind_speed"] = df["wind_speed"].str.extract(r"(\d+)").astype(float)
df = df.drop_duplicates()
df = df.fillna(0)
return df
def analyze_data(df):
# Вычисление статистических метрик
mean_temperature = round(df["temperature"].mean(), 2)
median_temperature =round(df["temperature"].median(),2)
std_temperature = round(df["temperature"].std(),2)
results = {
"mean_temperature": mean_temperature,
"median_temperature": median_temperature,
"std_temperature": std_temperature
}
return results
def visualize_data_api(df):
fig, ax = plt.subplots()
ax.plot(df['date_time'], df['temperature'])
plt.xticks(rotation=90)
ax.set_xlabel('Date')
ax.set_ylabel('Temperature')
ax.set_title('Temperature Over Time')
fig.set_size_inches(20, 15)
st.pyplot(fig)
def visualize_data_parsing(mas_month, math_temp):
fig, ax = plt.subplots()
ax.plot(mas_month, math_temp)
plt.xticks(rotation=90)
ax.set_xlabel('Month')
ax.set_ylabel('Temperature')
ax.set_title('Temperature per year 2022 in Moscow')
fig.set_size_inches(10, 6)
st.pyplot(fig)
def save_to_database(dateNow,timeNow, station, start_date, end_date):
conn = sqlite3.connect('statistic.db')
sql = conn.cursor()
sql.execute("""INSERT INTO statistic VALUES (?, ?, ?, ?,?)""", (dateNow,timeNow, station, start_date, end_date))
conn.commit()
conn.close()
def view_dataBase():
conn = sqlite3.connect('statistic.db')
df = pd.read_sql_query("SELECT * from statistic", conn)
return df
# Демонстрация проекта с помощью Streamlit
def streamlit_demo():
st.title("A few useful things!")
st.title("Black family tree graph from harry potter:")
make_graph()
st.title('Rating depends on the length of the film:')
mse_error = linear_regression()
st.write(f'Mean Squared Error: {mse_error}')
st.title("Weather Analysis")
temperature_moscow2022_button = st.button("Show temperature in Moscow for 2022") #кнопка для парсинга температуры в москве за 2022 год
if temperature_moscow2022_button:
url = "http://www.pogodaiklimat.ru/history/27612.htm"
soup = createDriver(url)
scraped_month, scraped_temp = scrape_weather_data(soup)
visualize_data_parsing(scraped_month, scraped_temp)
# Добавить элементы управления для выбора города, временного диапазона и отображения результатов
city = st.selectbox("Select City", ["Moscow", "Berlin", "Barnaul"])
start_date = st.date_input("Select Start Date")
end_date = st.date_input("Select End Date")
temperature_period_button = st.button("Submit") #кнопка для получения данных о погоде через api
if temperature_period_button:
now = datetime.now()
timeNow = now.strftime("%H:%M:%S")
dateNow = now.date()
save_to_database(dateNow,timeNow, city, start_date, end_date)
# Получение данных о погоде для выбранного города и временного диапазона
weather_data = get_weather_data(city, start_date, end_date)
processed_data = process_data(weather_data)
# Анализ данных
analyzed_data = analyze_data(processed_data)
# Визуализация данных
visualize_data_api(processed_data)
st.title("Data analysis")
for key, value in analyzed_data.items():
st.write(key, value)
statistic_button = st.button("View visit statistics") #кнопка для просмотра статистика нажатий кнопки "Submit"
if statistic_button:
df = view_dataBase()
st.write(df)
def main():
streamlit_demo()
if __name__ == '__main__':
main()
|