Welcome To Our Shell

Mister Spy & Souheyl Bypass Shell

Current Path : /proc/thread-self/root/usr/local/lib/python3.8/dist-packages/silverware/

Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
Upload File :
Current File : //proc/thread-self/root/usr/local/lib/python3.8/dist-packages/silverware/read_table.py

import pandas as pd
import copy
from collections import Counter
from bs4 import BeautifulSoup, Tag
import warnings

from .clone_beautiful_soup_tag import clone_beautiful_soup_tag
from .clean_html_text import clean_html_text
from .find_links import find_links
from .get_table_shape import get_table_shape

def add_to_dataframe(dataframe, row, column, value):
	num_rows = dataframe.shape[0]
	num_columns = dataframe.shape[1]
	for i in range(row + 1 - num_rows):
		dataframe = dataframe.append(pd.Series(name=i+num_rows), ignore_index=False)
	for j in range(column + 1 - num_columns):
		dataframe[j+num_columns] = None
	dataframe.iat[row, column] = value
	return dataframe

'''
def get_table_shape(table):
	num_columns = 0
	num_header_rows = 0
	num_rows = 0

	is_header = True
	for row in table.find_all("tr"):
		columns = row.find_all(["td", "th"])
		if len(columns) > 0:

			if is_header:
				if row.find('td') is None:
					num_header_rows += 1

				else:
					is_header = False
					num_rows += 1

			else:
				num_rows += 1

			if len(columns) > num_columns:
				num_columns = len(columns)
	return {'num_header_rows': num_header_rows, 'num_rows': num_rows, 'num_columns': num_columns}
'''


def join_html_texts(texts):
	string = ' '.join([text for text in texts if isinstance(text, str)])
	return clean_html_text(string)


def read_table(table, parse_links=False, base_url=None):
	table = clone_beautiful_soup_tag(table)
	for elem in table.find_all(["br"]):
		elem.replace_with(elem.text + "\n")

	table_shape = get_table_shape(table=table)

	# Create dataframe
	dataframe = pd.DataFrame(index=range(0, table_shape['num_rows']), columns=range(0, table_shape['num_columns']))
	header = pd.DataFrame(index=range(0, table_shape['num_header_rows']), columns=range(0, table_shape['num_columns']))

	# Create list to store rowspan values
	skip_index = [0 for _ in range(0, table_shape['num_columns'])]

	# Start by iterating over each row in this table...
	row_counter = 0
	header_row_counter = 0

	is_header = True
	for row in table.find_all("tr"):
		if is_header:
			is_header = row.find('td') is None

		# Skip row if it's blank
		columns = row.find_all(["td", "th"])
		if len(columns) > 0:
			# Get all cells containing data in this row

			col_dim = []
			row_dim = []
			col_dim_counter = -1
			row_dim_counter = -1
			col_counter = -1
			this_skip_index = copy.deepcopy(skip_index)

			for col in columns:

				# Determine cell dimensions
				colspan = col.get("colspan")
				if colspan is None:
					col_dim.append(1)
				else:
					col_dim.append(int(colspan))
				col_dim_counter += 1

				rowspan = col.get("rowspan")
				if rowspan is None:
					row_dim.append(1)
				else:
					row_dim.append(int(rowspan))
				row_dim_counter += 1

				# Adjust column counter
				if col_counter == -1:
					col_counter = 0
				else:
					col_counter = col_counter + col_dim[col_dim_counter - 1]

				while col_counter < len(this_skip_index) and this_skip_index[col_counter] > 0:
					col_counter += 1

				# Get cell contents
				if is_header:
					cell_data = clean_html_text(col, replace_images=True)
				elif parse_links:
					links = find_links(elements=col, base=base_url)
					if len(links) == 0:
						cell_data = clean_html_text(col, replace_images=True)
					elif len(links) == 1:
						cell_data = links[0]
					else:
						cell_data = links
				else:
					cell_data = clean_html_text(col, replace_images=True)


				# Insert data into cell all cells of a merged cell
				if colspan is None:
					num_columns_in_cell = 1
				else:
					num_columns_in_cell = int(colspan)

				if rowspan is None:
					num_rows_in_cell = 1
				else:
					num_rows_in_cell = int(rowspan)

				if is_header:
					for row_num in range(num_rows_in_cell):
						for column_num in range(num_columns_in_cell):
							header = add_to_dataframe(
								dataframe=header, row=header_row_counter + row_num, column=col_counter + column_num, value=cell_data
							)

				else:
					for row_num in range(num_rows_in_cell):
						for column_num in range(num_columns_in_cell):
							dataframe = add_to_dataframe(
								dataframe=dataframe, row=row_counter + row_num, column=col_counter + column_num, value=cell_data
							)

				# Record column skipping index
				if row_dim[row_dim_counter] > 1:
					this_skip_index[col_counter] = row_dim[row_dim_counter]

		# Adjust row counter
		if is_header:
			header_row_counter += 1
		else:
			row_counter += 1

		# Adjust column skipping index
		skip_index = [i - 1 if i > 0 else i for i in this_skip_index]
	columns = [join_html_texts(header[col].values) for col in header.columns]
	column_name_counter = Counter(columns)
	columns_with_number = []
	column_numbers = {}
	for column in columns:
		if column_name_counter[column] > 1:
			if column in column_numbers:
				column_numbers[column] += 1
			else:
				column_numbers[column] = 1
			columns_with_number.append(f'{column} {column_numbers[column]}')
		else:
			columns_with_number.append(column)

	dataframe.columns = columns_with_number
	return dataframe.reset_index(drop=True)


def read_tables(tables, parse_links=False, base_url=None, if_error='ignore'):
	if isinstance(tables, (BeautifulSoup, Tag)):
		try:
			return read_table(table=tables, parse_links=parse_links, base_url=base_url)
		except Exception as e:
			if if_error == 'ignore':
				return None
			elif if_error.startswith('warn'):
				warnings.warn(str(e))
			else:
				raise e

	else:
		result = [
			read_tables(tables=table, parse_links=parse_links, base_url=base_url, if_error=if_error)
			for table in tables
		]
		return [x for x in result if x is not None]

bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped)
Email: contact@elmoujehidin.net bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped) Email: contact@elmoujehidin.net