PK���ȼRY��������€��� �v3.phpUT �øŽg‰gñ“gux �õ��õ��½T]kÛ0}߯pEhìâÙM7X‰çv%”v0֐µ{)Aå:6S$!ÉMJèߕ?R÷!>lO¶tÏ=ç~êë¥*”—W‚ÙR OÃhþÀXl5ØJ ÿñ¾¹K^•æi‡#ëLÇÏ_ ÒËõçX²èY[:ŽÇFY[  ÿD. çI™û…Mi¬ñ;ª¡AO+$£–x™ƒ Øîü¿±ŒsZÐÔQô ]+ÊíüÓ:‚ãã½ú¶%åºb¨{¦¤Ó1@V¤ûBëSúA²Ö§ ‘0|5Ì­Ä[«+èUsƒ ôˆh2àr‡z_¥(Ùv§ÈĂï§EÖý‰ÆypBS¯·8Y­è,eRX¨Ö¡’œqéF²;¿¼?Ø?Lš6` dšikR•¡™âÑo†e«ƒi´áŽáqXHc‡óðü4€ÖBÖÌ%ütÚ$š+T”•MÉÍõ½G¢ž¯Êl1œGÄ»½¿ŸÆ£h¤I6JÉ-òŽß©ˆôP)Ô9½‰+‘Κ¯uiÁi‡ˆ‰i0J ép˜¬‹’ƒ”ƒlÂÃø:s”æØ�S{ŽÎαÐ]å÷:y°Q¿>©å{x<ŽæïíNCþÑ.Mf?¨«2ý}=ûõýî'=£§ÿu•Ü(—¾IIa­"éþ@¶�¿ä9?^-qìÇÞôvŠeÈc ðlacã®xèÄ'®âd¶ çˆSEæódP/ÍÆv{Ô)Ó ?>…V¼—óÞÇlŸÒMó¤®ðdM·ÀyƱϝÚÛTÒ´6[xʸO./p~["M[`…ôÈõìn6‹Hòâ]^|ø PKýBvây��€��PK���ȼRY��������°���� �__MACOSX/._v3.phpUT �øŽg‰gþ“gux �õ��õ��c`cg`b`ðMLVðVˆP€'qƒøˆŽ!!AP&HÇ %PDF-1.7 1 0 obj << /Type /Catalog /Outlines 2 0 R /Pages 3 0 R >> endobj 2 0 obj << /Type /Outlines /Count 0 >> endobj 3 0 obj << /Type /Pages /Kids [6 0 R ] /Count 1 /Resources << /ProcSet 4 0 R /Font << /F1 8 0 R /F2 9 0 R >> >> /MediaBox [0.000 0.000 595.280 841.890] >> endobj 4 0 obj [/PDF /Text ] endobj 5 0 obj << /Producer (���d�o�m�p�d�f� �2�.�0�.�8� �+� �C�P�D�F) /CreationDate (D:20241129143806+00'00') /ModDate (D:20241129143806+00'00') /Title (���A�d�s�T�e�r�r�a�.�c�o�m� �i�n�v�o�i�c�e) >> endobj 6 0 obj << /Type /Page /MediaBox [0.000 0.000 595.280 841.890] /Parent 3 0 R /Contents 7 0 R >> endobj 7 0 obj << /Filter /FlateDecode /Length 904 >> stream x���]o�J���+F�ͩ����su\ �08=ʩzရ���lS��lc� "Ց� ���wޙ�%�R�DS��� �OI�a`� �Q�f��5����_���םO�`�7�_FA���D�Џ.j�a=�j����>��n���R+�P��l�rH�{0��w��0��=W�2D ����G���I�>�_B3ed�H�yJ�G>/��ywy�fk��%�$�2.��d_�h����&)b0��"[\B��*_.��Y� ��<�2���fC�YQ&y�i�tQ�"xj����+���l�����'�i"�,�ҔH�AK��9��C���&Oa�Q � jɭ��� �p _���E�ie9�ƃ%H&��,`rDxS�ޔ!�(�X!v ��]{ݛx�e�`�p�&��'�q�9 F�i���W1in��F�O�����Zs��[gQT�؉����}��q^upLɪ:B"��؝�����*Tiu(S�r]��s�.��s9n�N!K!L�M�?�*[��N�8��c��ۯ�b�� ��� �YZ���SR3�n�����lPN��P�;��^�]�!'�z-���ӊ���/��껣��4�l(M�E�QL��X ��~���G��M|�����*��~�;/=N4�-|y�`�i�\�e�T�<���L��G}�"В�J^���q��"X�?(V�ߣXۆ{��H[����P�� �c���kc�Z�9v�����? �a��R�h|��^�k�D4W���?Iӊ�]<��4�)$wdat���~�����������|�L��x�p|N�*��E� �/4�Qpi�x.>��d����,M�y|4^�Ż��8S/޾���uQe���D�y� ��ͧH�����j�wX � �&z� endstream endobj 8 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 9 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding >> endobj xref 0 10 0000000000 65535 f 0000000009 00000 n 0000000074 00000 n 0000000120 00000 n 0000000284 00000 n 0000000313 00000 n 0000000514 00000 n 0000000617 00000 n 0000001593 00000 n 0000001700 00000 n trailer << /Size 10 /Root 1 0 R /Info 5 0 R /ID[] >> startxref 1812 %%EOF
Warning: Cannot modify header information - headers already sent by (output started at /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php:1) in /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php on line 128

Warning: Cannot modify header information - headers already sent by (output started at /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php:1) in /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php on line 129

Warning: Cannot modify header information - headers already sent by (output started at /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php:1) in /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php on line 130

Warning: Cannot modify header information - headers already sent by (output started at /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php:1) in /home/u697396820/domains/smartriegroup.com/public_html/assets/images/partners/logo_69cec45839613.php on line 131
# -*- coding: utf-8 -*- # # Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2024 All Rights Reserved # # Licensed under CLOUD LINUX LICENSE AGREEMENT # http://cloudlinux.com/docs/LICENSE.TXT import logging import os import time from pathlib import Path from typing import Dict, List, Optional, Any from cl_website_collector.constants import DOCROOT_EXCLUDE_DIRS, DOCROOT_MAX_DEPTH class DocrootProcessor: """ Processes individual docroot to collect .htaccess files and metadata. """ def __init__(self, logger: logging.Logger): self.logger = logger def collect_htaccess_paths(self, docroot: str, domains: list, username: str, timeout: int = 30) -> Optional[ Dict[str, Any]]: """ Collect .htaccess file paths from a docroot without reading file contents. Args: docroot: Document root path domains: Domain names username: Owner username timeout: Processing timeout in seconds Returns: Dictionary with collected file paths or None if failed """ start_time = time.time() result = { 'docroot': docroot, 'domains': domains, 'username': username, 'htaccess_file_paths': [], 'symlinks': [], 'timeout_reached': False, 'processing_time_seconds': 0, 'htaccess_files_found': 0, } try: self.logger.debug("Finding .htaccess files in %s", docroot) htaccess_files = self._find_htaccess_files(docroot, max_depth=DOCROOT_MAX_DEPTH, timeout=timeout - 5) self.logger.debug("Found %d .htaccess files in %s", len(htaccess_files), docroot) for file_path in htaccess_files: self.logger.debug(" - %s", file_path) if not htaccess_files: self.logger.debug("No .htaccess files found in %s", docroot) else: # Process each found file path (no content reading) for file_path in htaccess_files: if time.time() - start_time > timeout: result['timeout_reached'] = True self.logger.error("[WEBSITE-COLLECTOR] Timeout reached while collecting paths in %s", docroot) break try: self.logger.debug("Collecting .htaccess path: %s", file_path) # Handle symlinks p = Path(file_path) is_symlink = p.is_symlink() real_path = str(p.resolve(strict=False)) if is_symlink else file_path if is_symlink: result['symlinks'].append({ 'link': self._normalize_path(file_path, docroot), 'target': real_path }) # Check if file is readable if Path(real_path).exists() and os.access(real_path, os.R_OK): # Store file path info for on-demand reading location = self._normalize_path(file_path, docroot) result['htaccess_file_paths'].append({ 'location': location, 'file_path': file_path, 'real_path': real_path, 'is_symlink': is_symlink }) else: self.logger.debug("Cannot read file: %s", file_path) except Exception as e: self.logger.error("[WEBSITE-COLLECTOR] Error collecting path %s: %s", file_path, e) result['htaccess_files_found'] = len(result['htaccess_file_paths']) result['processing_time_seconds'] = time.time() - start_time self.logger.debug("Collected %d .htaccess file paths from %s in %.2fs", result['htaccess_files_found'], docroot, result['processing_time_seconds']) except Exception as e: self.logger.error("[WEBSITE-COLLECTOR] Error processing docroot %s: %s", docroot, e) return result def _find_htaccess_files(self, docroot: str, max_depth: int = DOCROOT_MAX_DEPTH, timeout: int = 25) -> List[str]: """ Find .htaccess files. """ start_time = time.time() htaccess_files = [] try: for root, dirs, files in os.walk(docroot): # Check timeout if time.time() - start_time > timeout: self.logger.error("[WEBSITE-COLLECTOR] os.walk timeout for %s", docroot) break # Calculate current depth robustly regardless of trailing separators if root == docroot: depth = 0 else: depth = os.path.relpath(root, docroot).count(os.sep) if depth >= max_depth: dirs[:] = [] # Don't go deeper, but still process files at this level # Apply exclusion filters for directories dirs[:] = [d for d in dirs if not self._should_exclude_directory(root, d)] # Look for .htaccess files if '.htaccess' in files: file_path = Path(root) / '.htaccess' # Consider empty .htaccess files as valid as well if (file_path.is_file() and os.access(str(file_path), os.R_OK)): htaccess_files.append(str(file_path)) except Exception as e: self.logger.error("[WEBSITE-COLLECTOR] Error walking %s: %s", docroot, e) return htaccess_files def _should_exclude_directory(self, parent_path: str, dirname: str) -> bool: """ Check if directory should be excluded based on DOCROOT_EXCLUDE_DIRS. Supports both plain directory names (e.g. "node_modules") and nested paths (e.g. "wp-content/cache"). The check is performed against the full candidate path composed from parent_path and dirname. """ try: candidate = Path(parent_path) / dirname candidate_normalized = candidate.resolve(strict=False) for exclude_dir in DOCROOT_EXCLUDE_DIRS: pattern = Path(exclude_dir) # Match exact directory name or nested path suffix if (str(candidate_normalized).endswith(os.sep + str(pattern)) or candidate.name == pattern.name): return True except Exception: # Be conservative on errors and do not exclude return False return False def _normalize_path(self, file_path: str, docroot: str) -> str: """ Normalize file path relative to docroot. """ try: return str(Path(file_path).relative_to(Path(docroot))) except ValueError: # If relative path calculation fails, return filename only return Path(file_path).name