|
import streamlit as st |
|
import time |
|
from selenium import webdriver |
|
from selenium.webdriver.firefox.options import Options |
|
from selenium.webdriver.firefox.service import Service |
|
from webdriver_manager.firefox import GeckoDriverManager |
|
from datetime import datetime |
|
from bs4 import BeautifulSoup |
|
import pandas as pd |
|
import sqlite3 |
|
import matplotlib.pyplot as plt |
|
import requests |
|
import networkx as nx |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import mean_squared_error |
|
|
|
def make_graph(): |
|
G = nx.DiGraph() |
|
G = nx.Graph() |
|
G.add_edges_from([('Nimfadora T','Tad T'), |
|
('Andromeda B','Nimfadora T'), |
|
('Andromeda B','Tad T'), |
|
('Andromeda B','Kingus B'), |
|
('Druela R','Kingus B'), |
|
('Andromeda B','Druela R'), |
|
('Narcisa B','Druela R'), |
|
('Narcisa B', 'Kingus B'), |
|
('Lucius M','Narcisa B'), |
|
('Draco M', 'Lucius M'), |
|
('Draco M', 'Narcisa B'), |
|
('Draco M','Astoria G'), |
|
('Scorpius M','Astoria G'), |
|
('Scorpius M', 'Draco M'), |
|
('Rimus L','Nimfadora T'), |
|
('Ted L', 'Rimus L'), |
|
('Ted L','Nimfadora T')]) |
|
|
|
fig, ax = plt.subplots() |
|
pos = nx.spring_layout(G) |
|
nx.draw_networkx(G, pos, with_labels=True, node_color='lightblue', node_size=500, edge_color='gray', width=2, alpha=0.7, ax=ax) |
|
st.pyplot(fig) |
|
|
|
def linear_regression(): |
|
df = pd.read_csv('imdb_top_1000.csv') |
|
df['Runtime'] = df['Runtime'].astype(str) |
|
df['IMDB_Rating'] = df['IMDB_Rating'].astype(str) |
|
df['Runtime'] = df['Runtime'].str.replace(r'\D', '') |
|
df['IMDB_Rating'] = df['IMDB_Rating'].str.replace(r'\D', '').astype(float) |
|
X = df['Runtime'].values.reshape(-1, 1) |
|
y = df['IMDB_Rating'].values |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
regressor = LinearRegression() |
|
regressor.fit(X_train, y_train) |
|
y_pred = regressor.predict(X_test) |
|
mse = mean_squared_error(y_test, y_pred) |
|
return mse |
|
print('Mean Squared Error:', mse) |
|
|
|
def createDriver(url): |
|
firefoxOptions = Options() |
|
firefoxOptions.add_argument("--headless") |
|
service = Service(GeckoDriverManager().install()) |
|
driver = webdriver.Firefox( |
|
options=firefoxOptions, |
|
service=service, |
|
) |
|
driver.get(url) |
|
time.sleep(2) |
|
main_page = driver.page_source |
|
soup = BeautifulSoup(main_page, 'html.parser') |
|
return soup |
|
|
|
def scrape_weather_data(soup): |
|
data = soup.find(class_="chronicle-table").find('tbody').find_all('tr') |
|
data_value = data[244].find_all('nobr') |
|
data_month = data[0].find_all('td') |
|
temp = [] |
|
temp_month = [] |
|
for i in data_value: |
|
temp.append(float(i.text)) |
|
for j in range(0, len(data_month)): |
|
temp_month.append(data_month[j].text) |
|
temp_month.pop() |
|
temp.pop() |
|
return temp_month, temp |
|
|
|
def get_weather_data(station, start_date, end_date): |
|
if station == "Barnaul": |
|
station = "Asia/Barnaul" |
|
elif station == "Moscow": |
|
station = "Europe/Moscow" |
|
else: |
|
station = "Europe/Berlin" |
|
url = "https://meteostat.p.rapidapi.com/stations/hourly" |
|
querystring = {"station":"10637","start":start_date,"end":end_date,"tz":station} |
|
headers = { |
|
"X-RapidAPI-Key": "9c8eb62f1fmsh82eba345d265b05p1541b2jsna3309cd23406", |
|
"X-RapidAPI-Host": "meteostat.p.rapidapi.com" |
|
} |
|
response = requests.get(url, headers=headers, params=querystring) |
|
data = response.json() |
|
data_mas = [] |
|
for j in data['data']: |
|
data_date = j['time'] |
|
windy_date = str(j['dwpt'])+" km/h" |
|
osadki = str(j['prcp']) + " mm" |
|
temperature =str(j['temp']) + " °C" |
|
data_mas.append([data_date, temperature, windy_date, osadki]) |
|
return data_mas |
|
|
|
def process_data(data): |
|
df = pd.DataFrame(data) |
|
df = df.rename(columns={ 0 : 'date_time', 1: 'temperature', 2: 'wind_speed', 3: 'humidity'}) |
|
|
|
df["temperature"] = df["temperature"].str.extract(r"(\d+)").astype(float) |
|
df["humidity"] = df["humidity"].str.extract(r"(\d+)").astype(float) |
|
df["wind_speed"] = df["wind_speed"].str.extract(r"(\d+)").astype(float) |
|
df = df.drop_duplicates() |
|
df = df.fillna(0) |
|
return df |
|
|
|
def analyze_data(df): |
|
|
|
mean_temperature = round(df["temperature"].mean(), 2) |
|
median_temperature =round(df["temperature"].median(),2) |
|
std_temperature = round(df["temperature"].std(),2) |
|
results = { |
|
"mean_temperature": mean_temperature, |
|
"median_temperature": median_temperature, |
|
"std_temperature": std_temperature |
|
} |
|
return results |
|
|
|
def visualize_data_api(df): |
|
fig, ax = plt.subplots() |
|
ax.plot(df['date_time'], df['temperature']) |
|
plt.xticks(rotation=90) |
|
ax.set_xlabel('Date') |
|
ax.set_ylabel('Temperature') |
|
ax.set_title('Temperature Over Time') |
|
fig.set_size_inches(20, 15) |
|
st.pyplot(fig) |
|
|
|
def visualize_data_parsing(mas_month, math_temp): |
|
fig, ax = plt.subplots() |
|
ax.plot(mas_month, math_temp) |
|
plt.xticks(rotation=90) |
|
ax.set_xlabel('Month') |
|
ax.set_ylabel('Temperature') |
|
ax.set_title('Temperature per year 2022 in Moscow') |
|
fig.set_size_inches(10, 6) |
|
st.pyplot(fig) |
|
|
|
def save_to_database(dateNow,timeNow, station, start_date, end_date): |
|
conn = sqlite3.connect('statistic.db') |
|
sql = conn.cursor() |
|
sql.execute("""INSERT INTO statistic VALUES (?, ?, ?, ?,?)""", (dateNow,timeNow, station, start_date, end_date)) |
|
conn.commit() |
|
conn.close() |
|
|
|
def view_dataBase(): |
|
conn = sqlite3.connect('statistic.db') |
|
df = pd.read_sql_query("SELECT * from statistic", conn) |
|
return df |
|
|
|
|
|
def streamlit_demo(): |
|
st.title("A few useful things!") |
|
st.title("Black family tree graph from harry potter:") |
|
make_graph() |
|
st.title('Rating depends on the length of the film:') |
|
mse_error = linear_regression() |
|
st.write(f'Mean Squared Error: {mse_error}') |
|
st.title("Weather Analysis") |
|
temperature_moscow2022_button = st.button("Show temperature in Moscow for 2022") |
|
if temperature_moscow2022_button: |
|
url = "http://www.pogodaiklimat.ru/history/27612.htm" |
|
soup = createDriver(url) |
|
scraped_month, scraped_temp = scrape_weather_data(soup) |
|
visualize_data_parsing(scraped_month, scraped_temp) |
|
|
|
city = st.selectbox("Select City", ["Moscow", "Berlin", "Barnaul"]) |
|
start_date = st.date_input("Select Start Date") |
|
end_date = st.date_input("Select End Date") |
|
temperature_period_button = st.button("Submit") |
|
if temperature_period_button: |
|
now = datetime.now() |
|
timeNow = now.strftime("%H:%M:%S") |
|
dateNow = now.date() |
|
save_to_database(dateNow,timeNow, city, start_date, end_date) |
|
|
|
weather_data = get_weather_data(city, start_date, end_date) |
|
processed_data = process_data(weather_data) |
|
|
|
analyzed_data = analyze_data(processed_data) |
|
|
|
visualize_data_api(processed_data) |
|
st.title("Data analysis") |
|
for key, value in analyzed_data.items(): |
|
st.write(key, value) |
|
statistic_button = st.button("View visit statistics") |
|
if statistic_button: |
|
df = view_dataBase() |
|
st.write(df) |
|
|
|
def main(): |
|
streamlit_demo() |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|