added lab 1
This commit is contained in:
2
lab1/.gitignore
vendored
Normal file
2
lab1/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
.vscode
|
||||||
|
data/*
|
||||||
13
lab1/genrate_data.py
Normal file
13
lab1/genrate_data.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
OUT_PATH="./data/"
|
||||||
|
FILE_COUNT=5
|
||||||
|
ROW_COUNT=int(1E5)
|
||||||
|
|
||||||
|
for file_num in range(FILE_COUNT):
|
||||||
|
rand_numbers=pd.DataFrame(np.random.rand(ROW_COUNT),columns=["Numbers"])
|
||||||
|
rand_letters=pd.DataFrame(np.random.randint(0,4,(ROW_COUNT)),columns=["Letters"]).replace({0:"A",1:"B",2:"C",3:"D"})
|
||||||
|
data=pd.concat((rand_letters,rand_numbers),axis=1)
|
||||||
|
with open(OUT_PATH+f"sample_{file_num+1}.csv","w",encoding="utf-8") as F:
|
||||||
|
data.to_csv(F,index=None)
|
||||||
39
lab1/process_data.py
Normal file
39
lab1/process_data.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from concurrent.futures import ThreadPoolExecutor # for paralell processing
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
IN_PATH="./data/"
|
||||||
|
|
||||||
|
def process_file(file_num,file_path):
|
||||||
|
data=pd.read_csv(file_path)
|
||||||
|
|
||||||
|
to_group_by=data["Letters"].unique()
|
||||||
|
to_group_by.sort()# bruh unsorted
|
||||||
|
results=[]
|
||||||
|
for letter in to_group_by:
|
||||||
|
numbers=data[data["Letters"]==letter]["Numbers"]
|
||||||
|
median=numbers.median()
|
||||||
|
standard_deviation=numbers.std()
|
||||||
|
results.append([file_num,letter,median,standard_deviation])
|
||||||
|
return pd.DataFrame(columns=["fileId","letter","median","stdDev"],data=results)
|
||||||
|
|
||||||
|
|
||||||
|
files=[IN_PATH+ x for x in os.listdir(IN_PATH)]
|
||||||
|
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||||
|
results = executor.map(process_file, range(len(files)),files)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
result=pd.concat(results,ignore_index=True)
|
||||||
|
result.sort_values(by=["fileId","letter"])
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
to_group_by=result["letter"].unique()
|
||||||
|
|
||||||
|
for letter in to_group_by:
|
||||||
|
global_median_by_letter=result[result["letter"]==letter]["median"]
|
||||||
|
median=global_median_by_letter.median()
|
||||||
|
standard_deviation=global_median_by_letter.std()
|
||||||
|
print(f"{letter}: median:{float(median)}, std:{float(standard_deviation)}")
|
||||||
6
lab1/readme.md
Normal file
6
lab1/readme.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
Аээ, потрачено 20 минут
|
||||||
|
изначально полез делать чтобы функция возвращала жсон,
|
||||||
|
но стало впадлу его соеденять и грузить
|
||||||
|
хорошее задание, мне всё понравилось
|
||||||
|
|
||||||
|
впервые после многолетнего перерыва работаю с пандасам
|
||||||
Reference in New Issue
Block a user