👨‍💻 dev creamers of the corn

development

the living tribunal

Moderator
Staff member
moderator
Python:
import requests
from bs4 import BeautifulSoup
from queue import Queue
from typing import List, Set, Tuple, Optional

def fetch_page(url: str) -> Optional[str]:
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        return None
    except requests.RequestException:
        return None

def extract_links(html: str, base_url: str) -> Set[str]:
    soup = BeautifulSoup(html, 'html.parser')
    links = set()
    for tag in soup.find_all('a', href=True):
        link = tag['href']
        if link.startswith('/'):
            link = base_url + link
        if link.startswith('http'):
            links.add(link)
    return links

def keyword_found(html: str, keywords: List[str]) -> bool:
    return any(keyword in html for keyword in keywords)

def negative_keyword_found(html: str, negative_keywords: List[str]) -> bool:
    return any(keyword in html for keyword in negative_keywords)

def web_crawler(seed_sites: List[str], keywords: List[str], negative_keywords: List[str], max_depth: int = 2) -> None:
    visited: Set[str] = set()
    queue: Queue[Tuple[str, int]] = Queue()
    
    for seed in seed_sites:
        queue.put((seed, 0))
    
    while not queue.empty():
        current_url, depth = queue.get()
        if depth > max_depth or current_url in visited:
            continue
        
        html = fetch_page(current_url)
        if html:
            if keyword_found(html, keywords) and not negative_keyword_found(html, negative_keywords):
                print(f"Found a match: {current_url}")
                for link in extract_links(html, current_url):
                    if link not in visited:
                        queue.put((link, depth + 1))
        
        visited.add(current_url)

# Example usage:
seed_sites = ['https://example.com']
keywords = ['keyword1', 'keyword2']
negative_keywords = ['negative1', 'negative2']
web_crawler(seed_sites, keywords, negative_keywords)
 

the living tribunal

Moderator
Staff member
moderator
Python:
import requests
from bs4 import BeautifulSoup
from queue import Queue
from typing import List, Set, Tuple, Optional

def fetch_page(url: str) -> Optional[str]:
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        return None
    except requests.RequestException:
        return None

def extract_links(html: str, base_url: str) -> Set[str]:
    soup = BeautifulSoup(html, 'html.parser')
    links = set()
    for tag in soup.find_all('a', href=True):
        link = tag['href']
        if link.startswith('/'):
            link = base_url + link
        if link.startswith('http'):
            links.add(link)
    return links

def keyword_found(html: str, keywords: List[str]) -> bool:
    return any(keyword in html for keyword in keywords)

def negative_keyword_found(html: str, negative_keywords: List[str]) -> bool:
    return any(keyword in html for keyword in negative_keywords)

def web_crawler(seed_sites: List[str], keywords: List[str], negative_keywords: List[str], max_depth: int = 2) -> None:
    visited: Set[str] = set()
    queue: Queue[Tuple[str, int]] = Queue()
    
    for seed in seed_sites:
        queue.put((seed, 0))
    
    while not queue.empty():
        current_url, depth = queue.get()
        if depth > max_depth or current_url in visited:
            continue
        
        html = fetch_page(current_url)
        if html:
            # Check for negative keywords
            if negative_keyword_found(html, negative_keywords):
                continue
            # Check for keywords if the list is not empty
            if not keywords or keyword_found(html, keywords):
                print(f"Found a match: {current_url}")
                for link in extract_links(html, current_url):
                    if link not in visited:
                        queue.put((link, depth + 1))
        
        visited.add(current_url)

# Example usage:
seed_sites = ['https://example.com']
keywords = []  # Empty keyword list
negative_keywords = ['negative1', 'negative2']
web_crawler(seed_sites, keywords, negative_keywords)
 

the living tribunal

Moderator
Staff member
moderator
Python:
import random
from typing import List

def get_random_combination(strings: List[str], n: int) -> str:
    if n > len(strings):
        return "Not enough items to choose from"
    random_items = random.sample(strings, n)
    return ''.join(random_items)

# Example usage:
strings = ['apple', 'banana', 'cherry', 'date']
n = 2
random_combination = get_random_combination(strings, n)
print(f"Random Combination: {random_combination}")
 

the living tribunal

Moderator
Staff member
moderator
Python:
import random
from typing import List

def concat_with_random_url_ending(base_str: str) -> str:
    typical_endings = ['.com', '.org', '.net', '.io', '.co', '.edu', '.gov', '.info', '.biz']
    random_ending = random.choice(typical_endings)
    return f"{base_str}{random_ending}"

# Example usage:
base_str = 'example'
result = concat_with_random_url_ending(base_str)
print(f"Concatenated URL: {result}")
 

the living tribunal

Moderator
Staff member
moderator
Python:
import requests

def check_url_exists(url: str) -> bool:
    try:
        response = requests.head(url, allow_redirects=True)
        return response.status_code // 100 == 2
    except requests.RequestException:
        return False

# Example usage:
url = 'https://www.example.com'
exists = check_url_exists(url)
print(exists)  # This print statement is just for demonstrating the usage; the function itself has no print statements.
 

fukurou

the supreme coder
ADMIN
Python:
    @staticmethod
    def check_url_operational(url: str) -> bool:
        # Ensure the URL has a scheme (http or https)
        parsed_url = urlparse(url)
        if not parsed_url.scheme:
            url = 'http://' + url
            parsed_url = urlparse(url)

        # Ensure the URL has a netloc (domain)
        if not parsed_url.netloc:
            url = 'http://www.' + url

        try:
            response = requests.get(url, allow_redirects=True)
            return response.status_code // 100 == 2
        except requests.RequestException:
            return False
 

fukurou

the supreme coder
ADMIN
Python:
from AXPython import AXCmdBreaker, RegexUtil
from LivinGrimoire23 import Brain, Skill
import webbrowser


class DiBrowser(Skill):
    def __init__(self):
        super().__init__()
        self.cmdBreaker: AXCmdBreaker = AXCmdBreaker("surf")
        self._str1: str = ""
        self.regexUtil = RegexUtil()

    def input(self, ear: str, skin: str, eye: str):
        if len(ear) == 0:
            return
        self._str1 = self.cmdBreaker.extractCmdParam(ear)
        if len(self._str1) > 0:
            temp: str = self.regexUtil.extractRegex(r"^([a-zA-Z0-9]+(\.[a-zA-Z0-9]+)+.*)$", self._str1)
            if len(temp) > 0:
                webbrowser.open(temp)
                self.setSimpleAlg("done")
            self._str1 = ""


def add_DLC_skills(brain: Brain):
    brain.add_logical_skill(DiBrowser())
 

fukurou

the supreme coder
ADMIN
Python:
import random
from typing import List

def concat_combinations(list1: List[str], list2: List[str], list3: List[str]) -> List[str]:
    combinations = []
    for item1 in list1:
        for item2 in list2:
            for item3 in list3:
                combinations.append(item1 + item2 + item3)
    random.shuffle(combinations)
    return combinations

# Example usage
list1 = ["a", "b"]
list2 = ["1", "2"]
list3 = ["X", "Y"]

result = concat_combinations(list1, list2, list3)
print(result)
 

owly

闇の伝説
Staff member
戦闘 コーダー
commence documenting!!!!!!!!!!!!!

the sliding device is operational!
 
Top