### Information Gathering - Web Edition
# Skills Assessment
I have to set Target(s): {ip}:{port}
of course /etc/hosts
- `inlanefreight.htb`
when I get inside or interact with, http://inlanefreight.htb:56908/
I have to write port as well for some cases
`gobuster vhost -u http://inlanefreight.htb:56908 -w /usr/share/seclists/Discovery/DNS/subdomains-top1million-110000.txt --append-domain`
Found: web1337.inlanefreight.htb:56908 Status: 200 [Size: 104]
http://web1337.inlanefreight.htb:56908/admin_h1dd3n/
Welcome to web1337 admin site
The admin panel is currently under maintenance, but the API is still accessible with the key e963d863ee0e82ba7080fbf558ca0d3f
import scrapy
import json
import re
from urllib.parse import urlparse
from scrapy.crawler import CrawlerProcess
from scrapy.downloadermiddlewares.offsite import OffsiteMiddleware
class FlexibleOffsiteMiddleware(OffsiteMiddleware):
""" Custom Middleware to handle URLs with ports """
def should_follow(self, request, spider):
if not self.host_regex:
return True
# Ensure we handle hostnames without the port number
hostname = urlparse(request.url).hostname
return bool(self.host_regex.search(hostname))
class ReconCrawler(scrapy.Spider):
""" Main Spider class for web reconnaissance """
name = 'recon_crawler'
def __init__(self, start_url, *args, **kwargs):
super().__init__(*args, **kwargs)
self.start_urls = [start_url]
self.allowed_domains = [urlparse(start_url).hostname]
self.crawled_data = {
'emails': set(),
'links': set(),
'external_files': set(),
'js_files': set(),
'form_fields': set(),
'images': set(),
'videos': set(),
'audio': set(),
'comments': set(),
}
def parse(self, response):
# Handle text-based responses only
if "text" in response.headers.get('Content-Type', b'').decode():
self.extract_data(response)
self.crawl_links(response)
def extract_data(self, response):
""" Extract various elements from the response """
# Find emails using regex
self.crawled_data['emails'].update(
re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', response.text)
)
# Collect various types of assets and data
self.crawled_data['js_files'].update(
response.css('script::attr(src)').getall()
)
self.crawled_data['external_files'].update(
response.css('link::attr(href), a::attr(href)').re(r'.*\.(css|pdf|docx?|xlsx?)$')
)
self.crawled_data['form_fields'].update(
response.css('input::attr(name), textarea::attr(name), select::attr(name)').getall()
)
self.crawled_data['images'].update(
response.css('img::attr(src)').getall()
)
self.crawled_data['videos'].update(
response.css('video::attr(src), source::attr(src)').getall()
)
self.crawled_data['audio'].update(
response.css('audio::attr(src), source::attr(src)').getall()
)
self.crawled_data['comments'].update(
response.xpath('//comment()').getall()
)
def crawl_links(self, response):
""" Identify links to follow and process recursively """
for href in response.css('a::attr(href)').getall():
# Skip email links
if href.startswith('mailto:'):
continue
full_url = response.urljoin(href)
parsed_url = urlparse(full_url)
# Follow internal links only
if parsed_url.hostname == urlparse(response.url).hostname:
yield response.follow(full_url, self.parse)
self.crawled_data['links'].add(full_url)
def closed(self, reason):
""" Handle post-crawl actions """
for key in self.crawled_data:
self.crawled_data[key] = list(self.crawled_data[key])
# Save results as JSON
with open('crawl_results.json', 'w') as file:
json.dump(self.crawled_data, file, indent=4)
self.log("Results saved to crawl_results.json")
def start_crawler(start_url):
""" Entry point for starting the crawler """
process = CrawlerProcess(settings={
'LOG_LEVEL': 'INFO',
'DOWNLOADER_MIDDLEWARES': {
'__main__.FlexibleOffsiteMiddleware': 500,
}
})
process.crawl(ReconCrawler, start_url=start_url)
process.start()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="Recon Crawler")
parser.add_argument('start_url', help="URL to start the web crawling process")
args = parser.parse_args()
start_crawler(args.start_url)
easy mail@inlanefreight.htb.
yummu.web1337.inlanefreight.htb and thanks
'호그와트' 카테고리의 다른 글
pqg (0) | 2024.11.08 |
---|---|
CVE 2024 27198 without annoying faker lib (0) | 2024.10.20 |
오늘도 밤에 해커들과 싸우며 그들의 턱뼈를 교정시켜줍니다 (0) | 2024.10.12 |
문크예거 딸의 운전 실력은 어느정도일까 ? (1) | 2024.10.07 |
just rolling the dices~~~ (0) | 2024.09.26 |