{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3d5d52d1-4874-44b5-b532-ef03da47644a", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from rdkit import Chem\n", "from rdkit.Chem import Descriptors, rdMolDescriptors, Crippen, Lipinski\n", "from tqdm import tqdm\n", "import warnings\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split\n", "import random\n", "from concurrent.futures import ProcessPoolExecutor\n", "import multiprocessing\n", "\n", "def analyze_polymer_features_rdkit(smiles):\n", " mol = Chem.MolFromSmiles(smiles)\n", " if mol is None:\n", " return None\n", " \n", " features = {}\n", " \n", " # Basic molecular properties\n", " features['mol_weight'] = Descriptors.MolWt(mol)\n", " features['exact_mol_weight'] = Descriptors.ExactMolWt(mol)\n", " features['num_heavy_atoms'] = mol.GetNumHeavyAtoms()\n", " features['num_atoms'] = mol.GetNumAtoms()\n", " features['num_bonds'] = mol.GetNumBonds()\n", " \n", " # Hydrogen bonding features\n", " features['num_hbond_donors'] = Descriptors.NumHDonors(mol)\n", " features['num_hbond_acceptors'] = Descriptors.NumHAcceptors(mol)\n", " features['num_heteroatoms'] = Descriptors.NumHeteroatoms(mol)\n", " \n", " # Structural complexity\n", " features['num_rotatable_bonds'] = Descriptors.NumRotatableBonds(mol)\n", " features['num_saturated_rings'] = Descriptors.NumSaturatedRings(mol)\n", " features['num_aromatic_rings'] = Descriptors.NumAromaticRings(mol)\n", " features['num_aliphatic_rings'] = Descriptors.NumAliphaticRings(mol)\n", " features['ring_count'] = Descriptors.RingCount(mol)\n", " features['fraction_csp3'] = Descriptors.FractionCSP3(mol)\n", " \n", " # Surface area and polarity\n", " features['tpsa'] = Descriptors.TPSA(mol)\n", " features['polar_surface_area'] = rdMolDescriptors.CalcTPSA(mol)\n", " \n", " # Lipophilicity and solubility\n", " features['logp'] = Descriptors.MolLogP(mol)\n", " features['crippen_logp'] = Crippen.MolLogP(mol)\n", " features['crippen_mr'] = Crippen.MolMR(mol) # Molar refractivity\n", " \n", " # Flexibility and rigidity\n", " features['kappa1'] = Descriptors.Kappa1(mol) # Molecular shape index\n", " features['kappa2'] = Descriptors.Kappa2(mol)\n", " features['kappa3'] = Descriptors.Kappa3(mol)\n", " features['chi0v'] = Descriptors.Chi0v(mol) # Connectivity indices\n", " features['chi1v'] = Descriptors.Chi1v(mol)\n", " features['chi2v'] = Descriptors.Chi2v(mol)\n", " \n", " # Electronic properties\n", " features['balaban_j'] = Descriptors.BalabanJ(mol)\n", " features['bertz_ct'] = Descriptors.BertzCT(mol) # Complexity index\n", " \n", " # Polymer-specific features\n", " features['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol)\n", " features['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol)\n", " \n", " # Atom type counts\n", " atom_counts = {}\n", " for atom in mol.GetAtoms():\n", " symbol = atom.GetSymbol()\n", " atom_counts[symbol] = atom_counts.get(symbol, 0) + 1\n", " \n", " # Add individual atom counts as features\n", " for element in ['C', 'N', 'O', 'S', 'P', 'F', 'Cl', 'Br', 'I']:\n", " features[f'count_{element}'] = atom_counts.get(element, 0)\n", " features[f'ratio_{element}'] = atom_counts.get(element, 0) / features['num_atoms'] if features['num_atoms'] > 0 else 0\n", " \n", " # Bond type analysis\n", " bond_types = {'SINGLE': 0, 'DOUBLE': 0, 'TRIPLE': 0, 'AROMATIC': 0}\n", " for bond in mol.GetBonds():\n", " bond_type = str(bond.GetBondType())\n", " if bond_type in bond_types:\n", " bond_types[bond_type] += 1\n", " \n", " for bond_type, count in bond_types.items():\n", " features[f'num_{bond_type.lower()}_bonds'] = count\n", " features[f'ratio_{bond_type.lower()}_bonds'] = count / features['num_bonds'] if features['num_bonds'] > 0 else 0\n", " \n", " # Hybridization analysis\n", " hybridization_counts = {'SP': 0, 'SP2': 0, 'SP3': 0, 'SP3D': 0, 'SP3D2': 0}\n", " for atom in mol.GetAtoms():\n", " hyb = str(atom.GetHybridization())\n", " if hyb in hybridization_counts:\n", " hybridization_counts[hyb] += 1\n", " \n", " for hyb_type, count in hybridization_counts.items():\n", " features[f'num_{hyb_type.lower()}_carbons'] = count\n", " features[f'ratio_{hyb_type.lower()}_carbons'] = count / features['num_atoms'] if features['num_atoms'] > 0 else 0\n", " \n", " # Formal charge analysis\n", " formal_charges = [atom.GetFormalCharge() for atom in mol.GetAtoms()]\n", " features['total_formal_charge'] = sum(formal_charges)\n", " features['abs_total_formal_charge'] = sum(abs(charge) for charge in formal_charges)\n", " features['max_formal_charge'] = max(formal_charges) if formal_charges else 0\n", " features['min_formal_charge'] = min(formal_charges) if formal_charges else 0\n", " \n", " # Aromaticity features\n", " aromatic_atoms = sum(1 for atom in mol.GetAtoms() if atom.GetIsAromatic())\n", " features['num_aromatic_atoms'] = aromatic_atoms\n", " features['aromatic_ratio'] = aromatic_atoms / features['num_atoms'] if features['num_atoms'] > 0 else 0\n", " \n", " # Ring size analysis\n", " ring_info = mol.GetRingInfo()\n", " ring_sizes = [len(ring) for ring in ring_info.AtomRings()]\n", " if ring_sizes:\n", " features['avg_ring_size'] = sum(ring_sizes) / len(ring_sizes)\n", " features['max_ring_size'] = max(ring_sizes)\n", " features['min_ring_size'] = min(ring_sizes)\n", " features['num_3_rings'] = sum(1 for size in ring_sizes if size == 3)\n", " features['num_4_rings'] = sum(1 for size in ring_sizes if size == 4)\n", " features['num_5_rings'] = sum(1 for size in ring_sizes if size == 5)\n", " features['num_6_rings'] = sum(1 for size in ring_sizes if size == 6)\n", " features['num_7_rings'] = sum(1 for size in ring_sizes if size == 7)\n", " features['num_large_rings'] = sum(1 for size in ring_sizes if size > 7)\n", " else:\n", " features.update({\n", " 'avg_ring_size': 0, 'max_ring_size': 0, 'min_ring_size': 0,\n", " 'num_3_rings': 0, 'num_4_rings': 0, 'num_5_rings': 0,\n", " 'num_6_rings': 0, 'num_7_rings': 0, 'num_large_rings': 0\n", " })\n", " \n", " # Polymer-specific structural features\n", " features['has_polymer_notation'] = '*' in smiles\n", " features['smiles_length'] = len(smiles)\n", " features['branch_count'] = smiles.count('(')\n", " features['branch_ratio'] = smiles.count('(') / len(smiles) if len(smiles) > 0 else 0\n", " \n", " return features\n", "\n", "def add_features(df, num_workers=None):\n", " \"\"\"\n", " Improved version using multiprocessing to calculate RDKit descriptors efficiently.\n", " \n", " Parameters:\n", " df: pandas DataFrame with 'Smiles' column\n", " num_workers: Number of worker processes (defaults to number of CPU cores)\n", " \"\"\"\n", " if num_workers is None:\n", " num_workers = multiprocessing.cpu_count()\n", " \n", " smiles_list = df['Smiles'].tolist()\n", " \n", " with ProcessPoolExecutor(max_workers=num_workers) as executor:\n", " # Use tqdm with executor.map for progress tracking\n", " features_list = list(tqdm(executor.map(analyze_polymer_features_rdkit, smiles_list), \n", " total=len(smiles_list), \n", " desc=\"Computing RDKit descriptors\"))\n", " \n", " # Convert results to DataFrame\n", " features_df = pd.DataFrame(features_list)\n", " \n", " # Concatenate with original DataFrame\n", " df_result = pd.concat([df, features_df], axis=1)\n", " \n", " return df_result\n", "\n", "def get_list_dif(l1, l2):\n", " return list(set(l1) - set(l2))\n", "\n", "# Usage example:\n", "# df_with_features = add_features(df, num_workers=4)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "155598af-79f3-4933-8b5c-1fd11f64b870", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('/home/jovyan/simson_training_bolgov/regression/PI_Tg_P308K_synth_db_chem.csv').drop(columns=['Unnamed: 0'], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "c69cc497-9fb6-4f74-96eb-257d7aa4a91a", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('/home/jovyan/simson_training_bolgov/kaggle_comp/train.csv')\n", "df['Smiles'] = df['SMILES']\n", "df" ] }, { "cell_type": "code", "execution_count": null, "id": "7b076c55-d6ef-4780-af97-5fccd5062661", "metadata": {}, "outputs": [], "source": [ "sample_df = df.iloc[:10_000]" ] }, { "cell_type": "code", "execution_count": null, "id": "96313883-c2ca-4eb8-9ec7-9aaca8dba077", "metadata": {}, "outputs": [], "source": [ "features_df = add_features(sample_df)" ] }, { "cell_type": "code", "execution_count": 2, "id": "41c7f85a-ea65-42e5-b315-ef304ba311c4", "metadata": {}, "outputs": [], "source": [ "selected_features = ['mol_weight', 'exact_mol_weight', 'num_heavy_atoms', 'num_atoms',\n", " 'num_bonds', 'num_hbond_donors', 'num_hbond_acceptors',\n", " 'num_heteroatoms', 'num_rotatable_bonds', 'num_saturated_rings',\n", " 'num_aromatic_rings', 'num_aliphatic_rings', 'ring_count',\n", " 'fraction_csp3', 'tpsa', 'polar_surface_area', 'logp', 'crippen_logp',\n", " 'crippen_mr', 'kappa1', 'kappa2', 'kappa3', 'chi0v', 'chi1v', 'chi2v',\n", " 'balaban_j', 'bertz_ct', 'num_radical_electrons',\n", " 'num_valence_electrons',\n", " 'count_O', 'ratio_O', 'count_S', 'ratio_S', 'count_P', 'ratio_P',\n", " 'count_F', 'ratio_F', 'count_Cl', 'ratio_Cl', 'count_Br', 'ratio_Br',\n", " 'count_I', 'ratio_I', 'num_single_bonds', 'ratio_single_bonds',\n", " 'num_double_bonds', 'ratio_double_bonds', 'num_triple_bonds',\n", " 'ratio_triple_bonds', 'num_aromatic_bonds', 'ratio_aromatic_bonds',\n", " 'num_sp_carbons', 'ratio_sp_carbons', 'num_sp2_carbons',\n", " 'ratio_sp2_carbons', 'num_sp3_carbons', 'ratio_sp3_carbons',\n", " 'num_sp3d_carbons', 'ratio_sp3d_carbons', 'num_sp3d2_carbons',\n", " 'ratio_sp3d2_carbons', 'total_formal_charge', 'abs_total_formal_charge',\n", " 'max_formal_charge', 'min_formal_charge', 'num_aromatic_atoms',\n", " 'aromatic_ratio', 'avg_ring_size', 'max_ring_size', 'min_ring_size',\n", " 'num_3_rings', 'num_4_rings', 'num_5_rings', 'num_6_rings',\n", " 'num_7_rings', 'num_large_rings', 'has_polymer_notation',\n", " 'branch_count', 'branch_ratio']" ] }, { "cell_type": "code", "execution_count": 4, "id": "fc31605d-cc21-4533-b04e-f8acdaef1a65", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'SMILES', 'Tg', 'FFV', 'Tc', 'Density', 'Rg', 'Smiles',\n", " 'mol_weight', 'exact_mol_weight', 'num_heavy_atoms', 'num_atoms',\n", " 'num_bonds', 'num_hbond_donors', 'num_hbond_acceptors',\n", " 'num_heteroatoms', 'num_rotatable_bonds', 'num_saturated_rings',\n", " 'num_aromatic_rings', 'num_aliphatic_rings', 'ring_count',\n", " 'fraction_csp3', 'tpsa', 'polar_surface_area', 'logp', 'crippen_logp',\n", " 'crippen_mr', 'kappa1', 'kappa2', 'kappa3', 'chi0v', 'chi1v', 'chi2v',\n", " 'balaban_j', 'bertz_ct', 'num_radical_electrons',\n", " 'num_valence_electrons', 'count_C', 'ratio_C', 'count_N', 'ratio_N',\n", " 'count_O', 'ratio_O', 'count_S', 'ratio_S', 'count_P', 'ratio_P',\n", " 'count_F', 'ratio_F', 'count_Cl', 'ratio_Cl', 'count_Br', 'ratio_Br',\n", " 'count_I', 'ratio_I', 'num_single_bonds', 'ratio_single_bonds',\n", " 'num_double_bonds', 'ratio_double_bonds', 'num_triple_bonds',\n", " 'ratio_triple_bonds', 'num_aromatic_bonds', 'ratio_aromatic_bonds',\n", " 'num_sp_carbons', 'ratio_sp_carbons', 'num_sp2_carbons',\n", " 'ratio_sp2_carbons', 'num_sp3_carbons', 'ratio_sp3_carbons',\n", " 'num_sp3d_carbons', 'ratio_sp3d_carbons', 'num_sp3d2_carbons',\n", " 'ratio_sp3d2_carbons', 'total_formal_charge', 'abs_total_formal_charge',\n", " 'max_formal_charge', 'min_formal_charge', 'num_aromatic_atoms',\n", " 'aromatic_ratio', 'avg_ring_size', 'max_ring_size', 'min_ring_size',\n", " 'num_3_rings', 'num_4_rings', 'num_5_rings', 'num_6_rings',\n", " 'num_7_rings', 'num_large_rings', 'has_polymer_notation',\n", " 'smiles_length', 'branch_count', 'branch_ratio'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scalers = []\n", "for col in selected_features:\n", " scaler = StandardScaler()\n", " features_df[col] = scaler.fit_transform(features_df[col].to_numpy().reshape(-1, 1)).flatten()\n", " scalers.append(scaler)\n", " \n", "features_df.columns" ] }, { "cell_type": "code", "execution_count": 3, "id": "f2f1a614-0ba7-4a01-9731-532afc1d14e0", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'features_df' is not defined", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m new_features = []\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m selected_features:\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m unique_list = \u001b[43mfeatures_df\u001b[49m[feature].unique()\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(unique_list) > \u001b[32m300\u001b[39m:\n\u001b[32m 6\u001b[39m new_features.append(feature)\n", "\u001b[31mNameError\u001b[39m: name 'features_df' is not defined" ] } ], "source": [ "new_features = []\n", "\n", "for feature in selected_features:\n", " unique_list = features_df[feature].unique()\n", " if len(unique_list) > 300:\n", " new_features.append(feature)\n", "new_features.append('Smiles')\n", "print(new_features)\n", "len(new_features), len(selected_features)" ] }, { "cell_type": "code", "execution_count": null, "id": "28cbac75-8a9f-4292-aedb-11f33f5a6056", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c065d950-7a63-4424-9923-1072d2e2268c", "metadata": {}, "outputs": [], "source": [ "features_df.to_csv('7k_w_descriptors.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 4, "id": "069a9021-d440-4bf1-9882-a2af25f2e801", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "SMILES | \n", "Tg | \n", "FFV | \n", "Tc | \n", "Density | \n", "Rg | \n", "Smiles | \n", "mol_weight | \n", "exact_mol_weight | \n", "... | \n", "num_3_rings | \n", "num_4_rings | \n", "num_5_rings | \n", "num_6_rings | \n", "num_7_rings | \n", "num_large_rings | \n", "has_polymer_notation | \n", "smiles_length | \n", "branch_count | \n", "branch_ratio | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "87817 | \n", "*CC(*)c1ccccc1C(=O)OCCCCCC | \n", "NaN | \n", "0.374645 | \n", "0.205667 | \n", "NaN | \n", "NaN | \n", "*CC(*)c1ccccc1C(=O)OCCCCCC | \n", "-0.875755 | \n", "-0.875617 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "-0.788904 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "26 | \n", "-0.985221 | \n", "-0.813832 | \n", "
1 | \n", "106919 | \n", "*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5... | \n", "NaN | \n", "0.370410 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5... | \n", "0.651876 | \n", "0.651916 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "0.736852 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "82 | \n", "0.336345 | \n", "-0.286141 | \n", "
2 | \n", "388772 | \n", "*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(... | \n", "NaN | \n", "0.378860 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(... | \n", "2.336573 | \n", "2.336165 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "2.644047 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "134 | \n", "1.657910 | \n", "-0.109289 | \n", "
3 | \n", "519416 | \n", "*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)... | \n", "NaN | \n", "0.387324 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)... | \n", "0.417716 | \n", "0.417722 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "1.118291 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "79 | \n", "0.556606 | \n", "0.132247 | \n", "
4 | \n", "539187 | \n", "*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N... | \n", "NaN | \n", "0.355470 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N... | \n", "2.178003 | \n", "2.178499 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "1.501149 | \n", "0.355413 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "118 | \n", "0.556606 | \n", "-0.830501 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7968 | \n", "2146592435 | \n", "*Oc1cc(CCCCCCCC)cc(OC(=O)c2cccc(C(*)=O)c2)c1 | \n", "NaN | \n", "0.367498 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*Oc1cc(CCCCCCCC)cc(OC(=O)c2cccc(C(*)=O)c2)c1 | \n", "-0.375261 | \n", "-0.375084 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "-0.407465 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "44 | \n", "-0.324438 | \n", "0.124891 | \n", "
7969 | \n", "2146810552 | \n", "*C(=O)OCCN(CCOC(=O)c1ccc2c(c1)C(=O)N(c1cccc(N3... | \n", "NaN | \n", "0.353280 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*C(=O)OCCN(CCOC(=O)c1ccc2c(c1)C(=O)N(c1cccc(N3... | \n", "1.284275 | \n", "1.284737 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "1.501149 | \n", "0.736852 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "110 | \n", "1.217388 | \n", "0.008668 | \n", "
7970 | \n", "2147191531 | \n", "*c1cc(C(=O)NCCCCCCCC)cc(N2C(=O)c3ccc(-c4ccc5c(... | \n", "NaN | \n", "0.369411 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*c1cc(C(=O)NCCCCCCCC)cc(N2C(=O)c3ccc(-c4ccc5c(... | \n", "0.329570 | \n", "0.329823 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "1.501149 | \n", "-0.026026 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "73 | \n", "0.336345 | \n", "0.021405 | \n", "
7971 | \n", "2147435020 | \n", "*C=C(*)c1ccccc1C | \n", "261.662355 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*C=C(*)c1ccccc1C | \n", "-1.359802 | \n", "-1.359728 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "-0.626991 | \n", "-0.788904 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "16 | \n", "-1.205481 | \n", "-1.182617 | \n", "
7972 | \n", "2147438299 | \n", "*c1ccc(OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCC... | \n", "NaN | \n", "0.374049 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "*c1ccc(OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCC... | \n", "1.160667 | \n", "1.160653 | \n", "... | \n", "-0.048476 | \n", "-0.069289 | \n", "0.437079 | \n", "-0.407465 | \n", "-0.051542 | \n", "-0.047917 | \n", "0.0 | \n", "72 | \n", "-0.324438 | \n", "-1.005054 | \n", "
7973 rows × 92 columns
\n", "