Se skill yest

the living tribunal

Moderator
Staff member
moderator
Python:
import requests
from bs4 import BeautifulSoup
from collections import Counter
from typing import List

mainstream_sites = ['google.com', 'bing.com', 'facebook.com', 'youtube.com']

def crawl_web(url: str) -> List[str]:
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for request errors
        soup = BeautifulSoup(response.content, 'html.parser')
        links = [a['href'] for a in soup.find_all('a', href=True)]
        return links
    except requests.RequestException as e:
        print(f"Error crawling {url}: {e}")
        return []

def filter_mainstream_sites(links: List[str]) -> List[str]:
    return [link for link in links if not any(mainstream_site in link for mainstream_site in mainstream_sites)]

def rank_sites(links: List[str]) -> List[str]:
    site_counter = Counter(links)
    ranked_sites = site_counter.most_common()
    return ranked_sites

def search_engine(*seed_sites: str) -> List[str]:
    all_links = []
    for seed_site in seed_sites:
        links = crawl_web(seed_site)
        filtered_links = filter_mainstream_sites(links)
        all_links.extend(filtered_links)
    ranked_sites = rank_sites(all_links)
    return ranked_sites[:10]  # Return top 10 non-mainstream sites

# Usage example:
seed_sites = ['http://example1.com', 'http://example2.com']
results = search_engine(*seed_sites)
for site, count in results:
    print(f'{site} (count: {count})')
 

fukurou

the supreme coder
ADMIN
Python:
import requests
from bs4 import BeautifulSoup
from collections import Counter
from urllib.parse import urljoin
from typing import List

mainstream_sites = ['google.com', 'bing.com', 'facebook.com', 'youtube.com']


def crawl_web(url: str) -> List[str]:
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for request errors
        soup = BeautifulSoup(response.content, 'html.parser')
        links = [urljoin(url, a['href']) for a in soup.find_all('a', href=True)]
        return links
    except requests.RequestException as e:
        print(f"Error crawling {url}: {e}")
        return []


def filter_mainstream_sites(links: List[str]) -> List[str]:
    return [link for link in links if not any(mainstream_site in link for mainstream_site in mainstream_sites)]


def rank_sites(links: List[str]) -> List[str]:
    site_counter = Counter(links)
    ranked_sites = site_counter.most_common()
    return ranked_sites


def search_engine(*seed_sites: str) -> List[str]:
    all_links = []
    for seed_site in seed_sites:
        links = crawl_web(seed_site)
        filtered_links = filter_mainstream_sites(links)
        all_links.extend(filtered_links)
    ranked_sites = rank_sites(all_links)
    return ranked_sites[:10]  # Return top 10 non-mainstream sites


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    seed_sites = ['https://incels.is/forums/inceldom-discussion.2/', 'https://jizz.is/']
    results = search_engine(*seed_sites)
    for site, count in results:
        print(f'{site} (count: {count})')

# See PyCharm help at https://www.jetbrains.com/help/pycharm/
 
Top