Welcome To Our Shell

Mister Spy & Souheyl Bypass Shell

Current Path : /proc/thread-self/root/usr/local/lib/python3.8/dist-packages/silverware/htmltext/

Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
Upload File :
Current File : //proc/thread-self/root/usr/local/lib/python3.8/dist-packages/silverware/htmltext/HtmlText.py

from pandas import DataFrame
from pandas.api.types import is_numeric_dtype

from .BaseHtmlText import BaseHtmlText
from .StructuredText import StructuredText


class HtmlText(BaseHtmlText):
	def __init__(self, obj, extract_tags=False, ignore_function=None, log=False, num_jobs=1):
		super().__init__(
			obj=obj,
			extract_tags=extract_tags,
			_id='1',
			ignore_function=ignore_function,
			depth=0,
			ids=None,
			log=log,
			num_jobs=num_jobs
		)
		self._data = None
		self._font_sizes = None
		self._full_data = None
		self._font_size_data = {}

	@classmethod
	def _get_state_attribute_names(cls):
		super_attribute_names = super()._get_state_attribute_names()
		other_attribute_names = {
			'_data': None,
			'_font_size_data': {},
			'_font_sizes': None,
			'_full_data': None,
			'_logs': [],
			'_ids': {}
		}
		return {**super_attribute_names, **other_attribute_names}

	def __setstate__(self, state):
		super().__setstate__(state=state)
		self._set_logs(logs=self._logs)
		self._set_ids(ids=self._ids)

	def __getstate__(self):
		state = super().__getstate__()
		return state

	@property
	def data(self):
		"""
		produces a data file of all the texts
		:rtype: DataFrame
		"""
		if self._data is None:
			self._data = DataFrame.from_records(self.records)
		return self._data

	@property
	def font_size_data(self):
		"""
		produces an aggregate data of font sizes
		:rtype: DataFrame
		"""
		return self.get_font_size_data()

	def get_font_size_data(self, split_by_weight=False):
		"""
		produces an aggregate data of font sizes
		:rtype: DataFrame
		"""
		key = (split_by_weight)
		if self._font_size_data is None:
			self._font_size_data = {}
		if key not in self._font_size_data:
			possible_group_columns = ['font_family', 'font_size']
			if split_by_weight:
				possible_group_columns = ['font_weight'] + possible_group_columns

			group_columns = [col for col in possible_group_columns if col in self.data.columns]
			data = self.data[['num_characters', 'num_strings'] + group_columns].copy()
			data['count'] = 1

			df = data.groupby(by=group_columns).agg(['median', 'sum']).reset_index(drop=False)

			df.columns = df.columns.map(lambda x: '_'.join(x).strip('_'))
			df['character_to_string_ratio'] = df['num_characters_sum'] / df['num_strings_sum']
			self._font_size_data[key] = df.rename(columns={'count_sum': 'count'}).drop(columns=['count_median'])
		return self._font_size_data[key]

	@property
	def font_sizes(self):
		"""
		:rtype: int or float
		"""

		if self._font_sizes is None:
			df = self.get_font_size_data()
			# body font size is the one with the largest character to string ratio
			max_character_to_string_ratio = df.character_to_string_ratio.max()

			body_font_size = df[df.character_to_string_ratio == max_character_to_string_ratio].iloc[0]['font_size']

			# title font sizes are the ones larger than body
			title_font_sizes = list(df[df.font_size > body_font_size]['font_size'])
			self._font_sizes = {'body': body_font_size, 'title_min': min(title_font_sizes), 'titles': title_font_sizes}
		return self._font_sizes

	@property
	def structured_text(self):
		"""
		:rtype: StructuredText
		"""
		return StructuredText(html_text=self)

	@property
	def full_data(self):
		"""
		:rtype: DataFrame
		"""
		if self._full_data is None:
			self._full_data = self.data.merge(right=self.get_font_size_data())
		return self._full_data

	def display_statistics(self):
		max_column_length = max([len(str(column)) for column in self.full_data.columns])
		result = []
		for column in self.full_data.columns:
			c = str(column)
			if is_numeric_dtype(self.full_data[column]):
				_min = round(self.full_data[column].min(), 2)
				_max = round(self.full_data[column].max(), 2)
				_median = round(self.full_data[column].median(), 2)
				_mean = round(self.full_data[column].mean(), 2)

				result.append(
					f'{c.ljust(max_column_length)} - min: {_min}, max: {_max}, mean: {_mean}, median: {_median}'
				)

			else:
				most_common = '", "'.join(self.full_data[column].value_counts()[:5].index.tolist())

				result.append(
					f'{c.ljust(max_column_length)} - common values: "{most_common}"'
				)
		print('\n'.join(result))

	def filter(self, dictionary, break_lines=True):
		"""
		returns a list of strings with the texts that fit the criteria in the dictionary
		:param dict dictionary: a dictionary that provides instructions about how to filter the data
		:rtype: list[str]
		"""
		result = self.full_data
		for key, value in dictionary.items():
			if isinstance(value, str) and value.startswith(('<', '>', '=', '!=')):

				if value.startswith(('>=', '<=', '!=', '==')):
					actual_value = value[2:].lstrip()
					if is_numeric_dtype(result[key]):
						actual_value = float(actual_value)

					if value.startswith('>='):
						result = result[result[key] >= actual_value]

					elif value.startswith('<='):
						result = result[result[key] <= actual_value]

					elif value.startswith('!='):
						result = result[result[key] != actual_value]

					elif value.startswith('=='):
						result = result[result[key] == actual_value]

				elif value.startswith(('>', '<', '=')):
					actual_value = value[1:].lstrip()
					if is_numeric_dtype(result[key]):
						actual_value = float(actual_value)

					if value.startswith('='):
						result = result[result[key] == actual_value]

					elif value.startswith('<'):
						result = result[result[key] < actual_value]

					elif value.startswith('>'):
						result = result[result[key] > actual_value]

			else:
				result = result[result[key] == value]

		if break_lines:
			return [y for x in list(result.text) for y in x.split('\n')]

		else:
			return list(result.text)

bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped)
Email: contact@elmoujehidin.net bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped) Email: contact@elmoujehidin.net