Skip to content

Commit 3b23ffe

Browse files
committed
amd64 pmap: reorder IPI send and local TLB flush in TLB invalidations.
Right now code first flushes all local TLB entries that needs to be flushed, then signals IPI to remote cores, and then waits for acknowledgements while spinning idle. In the VMWare article 'Don’t shoot down TLB shootdowns!' it was noted that the time spent spinning is lost, and can be more usefully used doing local TLB invalidation. We could use the same invalidation handler for local TLB as for remote, but typically for pmap == curpmap we can use INVLPG for locals instead of INVPCID on remotes, since we cannot control context switches on them. Due to that, keep the local code and provide the callbacks to be called from smp_targeted_tlb_shootdown() after IPIs are fired but before spin wait starts. Reviewed by: alc, cem, markj, Anton Rang <rang at acm.org> Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D25188
1 parent da21a62 commit 3b23ffe

File tree

5 files changed

+127
-57
lines changed

5 files changed

+127
-57
lines changed

sys/amd64/amd64/pmap.c

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,20 @@ DEFINE_IFUNC(static, void, pmap_invalidate_page_mode, (pmap_t, vm_offset_t))
25912591
return (pmap_invalidate_page_nopcid);
25922592
}
25932593

2594+
static void
2595+
pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va,
2596+
vm_offset_t addr2 __unused)
2597+
{
2598+
2599+
if (pmap == kernel_pmap) {
2600+
invlpg(va);
2601+
} else {
2602+
if (pmap == PCPU_GET(curpmap))
2603+
invlpg(va);
2604+
pmap_invalidate_page_mode(pmap, va);
2605+
}
2606+
}
2607+
25942608
void
25952609
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
25962610
{
@@ -2603,16 +2617,8 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
26032617
KASSERT(pmap->pm_type == PT_X86,
26042618
("pmap_invalidate_page: invalid type %d", pmap->pm_type));
26052619

2606-
sched_pin();
2607-
if (pmap == kernel_pmap) {
2608-
invlpg(va);
2609-
} else {
2610-
if (pmap == PCPU_GET(curpmap))
2611-
invlpg(va);
2612-
pmap_invalidate_page_mode(pmap, va);
2613-
}
2614-
smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap);
2615-
sched_unpin();
2620+
smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap,
2621+
pmap_invalidate_page_curcpu_cb);
26162622
}
26172623

26182624
/* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
@@ -2688,10 +2694,26 @@ DEFINE_IFUNC(static, void, pmap_invalidate_range_mode, (pmap_t, vm_offset_t,
26882694
return (pmap_invalidate_range_nopcid);
26892695
}
26902696

2697+
static void
2698+
pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2699+
{
2700+
vm_offset_t addr;
2701+
2702+
if (pmap == kernel_pmap) {
2703+
for (addr = sva; addr < eva; addr += PAGE_SIZE)
2704+
invlpg(addr);
2705+
} else {
2706+
if (pmap == PCPU_GET(curpmap)) {
2707+
for (addr = sva; addr < eva; addr += PAGE_SIZE)
2708+
invlpg(addr);
2709+
}
2710+
pmap_invalidate_range_mode(pmap, sva, eva);
2711+
}
2712+
}
2713+
26912714
void
26922715
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
26932716
{
2694-
vm_offset_t addr;
26952717

26962718
if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
26972719
pmap_invalidate_all(pmap);
@@ -2706,19 +2728,8 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
27062728
KASSERT(pmap->pm_type == PT_X86,
27072729
("pmap_invalidate_range: invalid type %d", pmap->pm_type));
27082730

2709-
sched_pin();
2710-
if (pmap == kernel_pmap) {
2711-
for (addr = sva; addr < eva; addr += PAGE_SIZE)
2712-
invlpg(addr);
2713-
} else {
2714-
if (pmap == PCPU_GET(curpmap)) {
2715-
for (addr = sva; addr < eva; addr += PAGE_SIZE)
2716-
invlpg(addr);
2717-
}
2718-
pmap_invalidate_range_mode(pmap, sva, eva);
2719-
}
2720-
smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap);
2721-
sched_unpin();
2731+
smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap,
2732+
pmap_invalidate_range_curcpu_cb);
27222733
}
27232734

27242735
static inline void
@@ -2805,6 +2816,14 @@ DEFINE_IFUNC(static, void, pmap_invalidate_all_mode, (pmap_t))
28052816
return (pmap_invalidate_all_nopcid);
28062817
}
28072818

2819+
static void
2820+
pmap_invalidate_all_curcpu_cb(pmap_t pmap, vm_offset_t addr1 __unused,
2821+
vm_offset_t addr2 __unused)
2822+
{
2823+
2824+
pmap_invalidate_all_mode(pmap);
2825+
}
2826+
28082827
void
28092828
pmap_invalidate_all(pmap_t pmap)
28102829
{
@@ -2817,20 +2836,23 @@ pmap_invalidate_all(pmap_t pmap)
28172836
KASSERT(pmap->pm_type == PT_X86,
28182837
("pmap_invalidate_all: invalid type %d", pmap->pm_type));
28192838

2820-
sched_pin();
2821-
pmap_invalidate_all_mode(pmap);
2822-
smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap);
2823-
sched_unpin();
2839+
smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap,
2840+
pmap_invalidate_all_curcpu_cb);
2841+
}
2842+
2843+
static void
2844+
pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused, vm_offset_t va __unused,
2845+
vm_offset_t addr2 __unused)
2846+
{
2847+
2848+
wbinvd();
28242849
}
28252850

28262851
void
28272852
pmap_invalidate_cache(void)
28282853
{
28292854

2830-
sched_pin();
2831-
wbinvd();
2832-
smp_cache_flush();
2833-
sched_unpin();
2855+
smp_cache_flush(pmap_invalidate_cache_curcpu_cb);
28342856
}
28352857

28362858
struct pde_action {

sys/i386/i386/pmap.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,13 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
12031203
}
12041204

12051205
#ifdef SMP
1206+
1207+
static void
1208+
pmap_curcpu_cb_dummy(pmap_t pmap __unused, vm_offset_t addr1 __unused,
1209+
vm_offset_t addr2 __unused)
1210+
{
1211+
}
1212+
12061213
/*
12071214
* For SMP, these functions have to use the IPI mechanism for coherence.
12081215
*
@@ -1241,7 +1248,7 @@ pmap_invalidate_page_int(pmap_t pmap, vm_offset_t va)
12411248
CPU_AND(&other_cpus, &pmap->pm_active);
12421249
mask = &other_cpus;
12431250
}
1244-
smp_masked_invlpg(*mask, va, pmap);
1251+
smp_masked_invlpg(*mask, va, pmap, pmap_curcpu_cb_dummy);
12451252
sched_unpin();
12461253
}
12471254

@@ -1274,7 +1281,7 @@ pmap_invalidate_range_int(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
12741281
CPU_AND(&other_cpus, &pmap->pm_active);
12751282
mask = &other_cpus;
12761283
}
1277-
smp_masked_invlpg_range(*mask, sva, eva, pmap);
1284+
smp_masked_invlpg_range(*mask, sva, eva, pmap, pmap_curcpu_cb_dummy);
12781285
sched_unpin();
12791286
}
12801287

@@ -1297,18 +1304,21 @@ pmap_invalidate_all_int(pmap_t pmap)
12971304
CPU_AND(&other_cpus, &pmap->pm_active);
12981305
mask = &other_cpus;
12991306
}
1300-
smp_masked_invltlb(*mask, pmap);
1307+
smp_masked_invltlb(*mask, pmap, pmap_curcpu_cb_dummy);
13011308
sched_unpin();
13021309
}
13031310

13041311
static void
1305-
__CONCAT(PMTYPE, invalidate_cache)(void)
1312+
pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused,
1313+
vm_offset_t addr1 __unused, vm_offset_t addr2 __unused)
13061314
{
1307-
1308-
sched_pin();
13091315
wbinvd();
1310-
smp_cache_flush();
1311-
sched_unpin();
1316+
}
1317+
1318+
static void
1319+
__CONCAT(PMTYPE, invalidate_cache)(void)
1320+
{
1321+
smp_cache_flush(pmap_invalidate_cache_curcpu_cb);
13121322
}
13131323

13141324
struct pde_action {

sys/i386/i386/vm_machdep.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,12 @@ sf_buf_map(struct sf_buf *sf, int flags)
578578
}
579579

580580
#ifdef SMP
581+
static void
582+
sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused,
583+
vm_offset_t addr1 __unused, vm_offset_t addr2 __unused)
584+
{
585+
}
586+
581587
void
582588
sf_buf_shootdown(struct sf_buf *sf, int flags)
583589
{
@@ -596,7 +602,8 @@ sf_buf_shootdown(struct sf_buf *sf, int flags)
596602
CPU_ANDNOT(&other_cpus, &sf->cpumask);
597603
if (!CPU_EMPTY(&other_cpus)) {
598604
CPU_OR(&sf->cpumask, &other_cpus);
599-
smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap);
605+
smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap,
606+
sf_buf_shootdown_curcpu_cb);
600607
}
601608
}
602609
sched_unpin();

sys/x86/include/x86_smp.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ inthand_t
8484
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
8585
IDTVEC(rendezvous); /* handle CPU rendezvous */
8686

87+
typedef void (*smp_invl_cb_t)(struct pmap *, vm_offset_t addr1,
88+
vm_offset_t addr2);
89+
8790
/* functions in x86_mp.c */
8891
void assign_cpu_ids(void);
8992
void cpu_add(u_int apic_id, char boot_cpu);
@@ -103,11 +106,13 @@ void ipi_cpu(int cpu, u_int ipi);
103106
int ipi_nmi_handler(void);
104107
void ipi_selected(cpuset_t cpus, u_int ipi);
105108
void set_interrupt_apic_ids(void);
106-
void smp_cache_flush(void);
107-
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap);
109+
void smp_cache_flush(smp_invl_cb_t curcpu_cb);
110+
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap,
111+
smp_invl_cb_t curcpu_cb);
108112
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
109-
vm_offset_t endva, struct pmap *pmap);
110-
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
113+
vm_offset_t endva, struct pmap *pmap, smp_invl_cb_t curcpu_cb);
114+
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap,
115+
smp_invl_cb_t curcpu_cb);
111116
void mem_range_AP_init(void);
112117
void topo_probe(void);
113118
void ipi_send_cpu(int cpu, u_int ipi);

sys/x86/x86/mp_x86.c

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,29 +1676,45 @@ volatile uint32_t smp_tlb_generation;
16761676
#define read_eflags() read_rflags()
16771677
#endif
16781678

1679+
/*
1680+
* Used by pmap to request invalidation of TLB or cache on local and
1681+
* remote processors. Mask provides the set of remote CPUs which are
1682+
* to be signalled with the IPI specified by vector. The curcpu_cb
1683+
* callback is invoked on the calling CPU while waiting for remote
1684+
* CPUs to complete the operation.
1685+
*
1686+
* The callback function is called unconditionally on the caller's
1687+
* underlying processor, even when this processor is not set in the
1688+
* mask. So, the callback function must be prepared to handle such
1689+
* spurious invocations.
1690+
*/
16791691
static void
16801692
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
1681-
vm_offset_t addr1, vm_offset_t addr2)
1693+
vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
16821694
{
16831695
cpuset_t other_cpus;
16841696
volatile uint32_t *p_cpudone;
16851697
uint32_t generation;
16861698
int cpu;
16871699

16881700
/* It is not necessary to signal other CPUs while in the debugger. */
1689-
if (kdb_active || KERNEL_PANICKED())
1701+
if (kdb_active || KERNEL_PANICKED()) {
1702+
curcpu_cb(pmap, addr1, addr2);
16901703
return;
1704+
}
1705+
1706+
sched_pin();
16911707

16921708
/*
16931709
* Check for other cpus. Return if none.
16941710
*/
16951711
if (CPU_ISFULLSET(&mask)) {
16961712
if (mp_ncpus <= 1)
1697-
return;
1713+
goto nospinexit;
16981714
} else {
16991715
CPU_CLR(PCPU_GET(cpuid), &mask);
17001716
if (CPU_EMPTY(&mask))
1701-
return;
1717+
goto nospinexit;
17021718
}
17031719

17041720
if (!(read_eflags() & PSL_I))
@@ -1722,6 +1738,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
17221738
ipi_send_cpu(cpu, vector);
17231739
}
17241740
}
1741+
curcpu_cb(pmap, addr1, addr2);
17251742
while ((cpu = CPU_FFS(&other_cpus)) != 0) {
17261743
cpu--;
17271744
CPU_CLR(cpu, &other_cpus);
@@ -1730,26 +1747,35 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
17301747
ia32_pause();
17311748
}
17321749
mtx_unlock_spin(&smp_ipi_mtx);
1750+
sched_unpin();
1751+
return;
1752+
1753+
nospinexit:
1754+
curcpu_cb(pmap, addr1, addr2);
1755+
sched_unpin();
17331756
}
17341757

17351758
void
1736-
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
1759+
smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
17371760
{
17381761

17391762
if (smp_started) {
1740-
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
1763+
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0,
1764+
curcpu_cb);
17411765
#ifdef COUNT_XINVLTLB_HITS
17421766
ipi_global++;
17431767
#endif
17441768
}
17451769
}
17461770

17471771
void
1748-
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
1772+
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
1773+
smp_invl_cb_t curcpu_cb)
17491774
{
17501775

17511776
if (smp_started) {
1752-
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
1777+
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0,
1778+
curcpu_cb);
17531779
#ifdef COUNT_XINVLTLB_HITS
17541780
ipi_page++;
17551781
#endif
@@ -1758,12 +1784,12 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
17581784

17591785
void
17601786
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
1761-
pmap_t pmap)
1787+
pmap_t pmap, smp_invl_cb_t curcpu_cb)
17621788
{
17631789

17641790
if (smp_started) {
17651791
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap,
1766-
addr1, addr2);
1792+
addr1, addr2, curcpu_cb);
17671793
#ifdef COUNT_XINVLTLB_HITS
17681794
ipi_range++;
17691795
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
@@ -1772,12 +1798,12 @@ smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
17721798
}
17731799

17741800
void
1775-
smp_cache_flush(void)
1801+
smp_cache_flush(smp_invl_cb_t curcpu_cb)
17761802
{
17771803

17781804
if (smp_started) {
17791805
smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
1780-
0, 0);
1806+
0, 0, curcpu_cb);
17811807
}
17821808
}
17831809

0 commit comments

Comments
 (0)