Skip to content

Commit e345db7

Browse files
committed
add selects and textareas in form extraction & submission tutorial
1 parent 52cb6d0 commit e345db7

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

‎web-scraping/extract-and-fill-forms/form_extractor.py‎

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ def get_form_details(form):
2121
including action, method and list of form controls (inputs, etc)"""
2222
details={}
2323
# get the form action (requested URL)
24-
action=form.attrs.get("action").lower()
24+
action=form.attrs.get("action")
25+
ifaction:
26+
action=action.lower()
2527
# get the form method (POST, GET, DELETE, etc)
2628
# if not specified, GET is the default in HTML
2729
method=form.attrs.get("method", "get").lower()
@@ -36,6 +38,38 @@ def get_form_details(form):
3638
input_value=input_tag.attrs.get("value", "")
3739
# add everything to that list
3840
inputs.append({"type": input_type, "name": input_name, "value": input_value})
41+
forselectinform.find_all("select"):
42+
# get the name attribute
43+
select_name=select.attrs.get("name")
44+
# set the type as select
45+
select_type="select"
46+
select_options= []
47+
# the default select value
48+
select_default_value=""
49+
# iterate over options and get the value of each
50+
forselect_optioninselect.find_all("option"):
51+
# get the option value used to submit the form
52+
option_value=select_option.attrs.get("value")
53+
ifoption_value:
54+
select_options.append(option_value)
55+
ifselect_option.attrs.get("selected"):
56+
# if 'selected' attribute is set, set this option as default
57+
select_default_value=option_value
58+
ifnotselect_default_valueandselect_options:
59+
# if the default is not set, and there are options, take the first option as default
60+
select_default_value=select_options[0]
61+
# add the select to the inputs list
62+
inputs.append({"type": select_type, "name": select_name, "values": select_options, "value": select_default_value})
63+
fortextareainform.find_all("textarea"):
64+
# get the name attribute
65+
textarea_name=textarea.attrs.get("name")
66+
# set the type as textarea
67+
textarea_type="textarea"
68+
# get the textarea value
69+
textarea_value=textarea.attrs.get("value", "")
70+
# add the textarea to the inputs list
71+
inputs.append({"type": textarea_type, "name": textarea_name, "value": textarea_value})
72+
3973
# put everything to the resulting dictionary
4074
details["action"] =action
4175
details["method"] =method

‎web-scraping/extract-and-fill-forms/form_submitter.py‎

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
frombs4importBeautifulSoup
2-
fromrequests_htmlimportHTMLSession
32

43
frompprintimportpprint
54
fromurllib.parseimporturljoin
@@ -10,25 +9,50 @@
109

1110
# get the URL from the command line
1211
url=sys.argv[1]
12+
all_forms=get_all_forms(url)
1313
# get the first form (edit this as you wish)
14-
first_form=get_all_forms(url)[0]
14+
# first_form = get_all_forms(url)[0]
15+
fori, finenumerate(all_forms, start=1):
16+
form_details=get_form_details(f)
17+
print(f"{i} #")
18+
pprint(form_details)
19+
print("="*50)
20+
21+
choice=int(input("Enter form indice: "))
1522
# extract all form details
16-
form_details=get_form_details(first_form)
23+
form_details=get_form_details(all_forms[choice-1])
1724
pprint(form_details)
1825
# the data body we want to submit
1926
data={}
2027
forinput_taginform_details["inputs"]:
2128
ifinput_tag["type"] =="hidden":
2229
# if it's hidden, use the default value
2330
data[input_tag["name"]] =input_tag["value"]
31+
elifinput_tag["type"] =="select":
32+
fori, optioninenumerate(input_tag["values"], start=1):
33+
# iterate over available select options
34+
ifoption==input_tag["value"]:
35+
print(f"{i} # {option} (default)")
36+
else:
37+
print(f"{i} # {option}")
38+
choice=input(f"Enter the option for the select field '{input_tag['name']}' (1-{i}): ")
39+
try:
40+
choice=int(choice)
41+
except:
42+
# choice invalid, take the default
43+
value=input_tag["value"]
44+
else:
45+
value=input_tag["values"][choice-1]
46+
data[input_tag["name"]] =value
2447
elifinput_tag["type"] !="submit":
2548
# all others except submit, prompt the user to set it
2649
value=input(f"Enter the value of the field '{input_tag['name']}' (type: {input_tag['type']}): ")
2750
data[input_tag["name"]] =value
51+
2852

2953
# join the url with the action (form request URL)
3054
url=urljoin(url, form_details["action"])
31-
55+
# pprint(data)
3256
ifform_details["method"] =="post":
3357
res=session.post(url, data=data)
3458
elifform_details["method"] =="get":

0 commit comments

Comments
(0)